# Random Forest 

In [42]:
import seaborn as sns 
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

In [43]:
data = sns.load_dataset('diamonds')

In [44]:
data.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [45]:
# Convert categorial variables to numerical 
data['cut']= data['cut'].astype('category').cat.codes
data['color']= data['color'].astype('category').cat.codes
data['clarity']= data['clarity'].astype('category').cat.codes

In [46]:
# Select feature and target variable 
X = data.drop(columns=['price'])
y = data['price']

In [47]:
X

Unnamed: 0,carat,cut,color,clarity,depth,table,x,y,z
0,0.23,0,1,6,61.5,55.0,3.95,3.98,2.43
1,0.21,1,1,5,59.8,61.0,3.89,3.84,2.31
2,0.23,3,1,3,56.9,65.0,4.05,4.07,2.31
3,0.29,1,5,4,62.4,58.0,4.20,4.23,2.63
4,0.31,3,6,6,63.3,58.0,4.34,4.35,2.75
...,...,...,...,...,...,...,...,...,...
53935,0.72,0,0,5,60.8,57.0,5.75,5.76,3.50
53936,0.72,3,0,5,63.1,55.0,5.69,5.75,3.61
53937,0.70,2,0,5,62.8,60.0,5.66,5.68,3.56
53938,0.86,1,4,6,61.0,58.0,6.15,6.12,3.74


In [54]:
y

0         326
1         326
2         327
3         334
4         335
         ... 
53935    2757
53936    2757
53937    2757
53938    2757
53939    2757
Name: price, Length: 53940, dtype: int64

In [48]:
# split data into training and tesring sets
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=1)


In [49]:
# Initialize the random forest regressor
rf = RandomForestRegressor(n_estimators=100,random_state=1)


In [50]:
X_train

Unnamed: 0,carat,cut,color,clarity,depth,table,x,y,z
30083,0.32,0,5,2,61.5,56.0,4.42,4.40,2.71
21864,1.54,1,2,5,62.8,58.0,7.35,7.32,4.61
3761,0.71,0,2,3,62.1,56.0,5.69,5.75,3.55
21262,1.03,0,2,2,62.0,56.0,6.48,6.45,4.01
15460,1.33,0,6,3,62.4,55.0,7.02,7.05,4.39
...,...,...,...,...,...,...,...,...,...
50057,0.70,2,5,4,62.8,59.0,5.56,5.61,3.51
32511,0.40,3,1,6,63.9,57.0,4.71,4.65,2.99
5192,0.91,2,3,4,62.7,63.0,6.05,6.00,3.78
12172,1.18,0,6,3,61.7,56.0,6.79,6.82,4.20


In [51]:
# train the  model on the training data
rf.fit(X_train,y_train)

In [52]:
# Make predictions on the testing data 
y_pred = rf.predict(X_test)

In [58]:
# Evaluate the model 
r2 = r2_score(y_test,y_pred)
MSE = mean_squared_error(y_test, y_pred)
print(f"r2_score:{r2:.2f}")
print(f"mean_squared_error:{MSE:.2f}")

r2_score:0.98
mean_squared_error:287042.20


# Random Forest Classifier

In [73]:
import seaborn as sns 
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, accuracy_score


In [60]:
data = sns.load_dataset('diamonds')

In [61]:
data.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [62]:
# Convert categorial variables to numerical 
data['cut']= data['cut'].astype('category').cat.codes
data['color']= data['color'].astype('category').cat.codes
data['clarity']= data['clarity'].astype('category').cat.codes

In [63]:
# Select feature and target variable 
X = data.drop(columns=['cut'])
y = data['cut']

In [65]:
X

Unnamed: 0,carat,color,clarity,depth,table,price,x,y,z
0,0.23,1,6,61.5,55.0,326,3.95,3.98,2.43
1,0.21,1,5,59.8,61.0,326,3.89,3.84,2.31
2,0.23,1,3,56.9,65.0,327,4.05,4.07,2.31
3,0.29,5,4,62.4,58.0,334,4.20,4.23,2.63
4,0.31,6,6,63.3,58.0,335,4.34,4.35,2.75
...,...,...,...,...,...,...,...,...,...
53935,0.72,0,5,60.8,57.0,2757,5.75,5.76,3.50
53936,0.72,0,5,63.1,55.0,2757,5.69,5.75,3.61
53937,0.70,0,5,62.8,60.0,2757,5.66,5.68,3.56
53938,0.86,4,6,61.0,58.0,2757,6.15,6.12,3.74


In [66]:
y

0        0
1        1
2        3
3        1
4        3
        ..
53935    0
53936    3
53937    2
53938    1
53939    0
Name: cut, Length: 53940, dtype: int8

In [67]:
# split data into training and tesring sets
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=1)


In [69]:
# Initialize the random forest regressor
rf = RandomForestClassifier(n_estimators=100,random_state=1)


In [70]:
# train the  model on the training data
rf.fit(X_train,y_train)

In [71]:
# Make predictions on the testing data 
y_pred = rf.predict(X_test)

In [75]:
# Evaluate the model 
r2 = r2_score(y_test,y_pred)
MSE = mean_squared_error(y_test, y_pred)
Acc = accuracy_score(y_test, y_pred)
print(f"r2_score:{r2:.2f}")
print(f"mean_squared_error:{MSE:.2f}")
print(f"Accuracy:{Acc:.2f}")



r2_score:0.64
mean_squared_error:0.45
Accuracy:0.79


### Random Forest Classifier on Iris Dataset

In [12]:
import pandas as pd 
import numpy as np
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# Load dataset 
from sklearn.datasets import load_iris
data = load_iris()

In [3]:
# define the data and target 
X_data = pd.DataFrame(data.data, columns= data.feature_names)
y_data = pd.Series(data = data.target, name='Targets')

In [5]:
X_data.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [7]:
y_data.head()

0    0
1    0
2    0
3    0
4    0
Name: Targets, dtype: int32

In [6]:
X_data.shape

(150, 4)

In [8]:
# split the data into training data sets 
X_train, X_test, y_train, y_test = train_test_split(X_data,y_data,test_size=0.2, random_state=0)

In [9]:
# define the model 
model = RandomForestClassifier()

In [10]:
# fit the values in the model 
model.fit(X_train, y_train)

In [11]:
# Predict the value 
y_pred = model.predict(X_test)

In [14]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy:{accuracy:.2f}")

Accuracy:1.00


In [17]:
from joblib import dump
dump(model, 'C:\\Users\\DilawarComputer\\Desktop\\DjangoMLDeployment\\savedModels\\model.joblib')

['C:\\Users\\DilawarComputer\\Desktop\\DjangoMLDeployment\\savedModels\\model.joblib']