In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('Copper2.csv')

In [3]:
data.head()

Unnamed: 0,item_date,quantity tons,country,status,item type,application,thickness,width,product_ref,delivery date,selling_price
0,247,4.04,28.0,1,5,10.0,0.65,1500.0,1670798778,2021,9.41
1,247,6.78,25.0,1,5,41.0,-0.23,1210.0,1668701718,2021,9.79
2,247,6.06,30.0,1,6,28.0,-1.06,952.0,628377,2021,8.88
3,247,5.39,32.0,1,3,59.0,0.77,1317.0,1668701718,2021,9.21
4,247,6.8,28.0,1,5,10.0,1.23,1986.0,640665,2021,8.68


Train test splitting the data for Model building

In [4]:
X = data.drop(['selling_price'],axis = 1)
y = data['selling_price']

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import ExtraTreesRegressor
from xgboost import XGBRegressor
from sklearn import metrics

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### 1. Random Forest Regression Model

In [7]:
rf = RandomForestRegressor(n_estimators=150)
rf.fit(X_train.values, y_train.values)

In [8]:
rf_train_predictions = rf.predict(X_train.values)
rf_test_predictions = rf.predict(X_test.values)

In [9]:
print("RANDOM FOREST REGRESSION")
print("--------------------------")
print("Evaluating Train Data")
print("--------------------------")

print('R2 score:',metrics.r2_score(y_train,rf_train_predictions))
print("MAE :",metrics.mean_absolute_error(y_train,rf_train_predictions))
print("MSE :",metrics.mean_squared_error(y_train,rf_train_predictions))
print('RMSE :', np.sqrt(metrics.mean_squared_error(y_train,rf_train_predictions)))

print('---------------------------------------------------------------')

print("Evaluating Test Data")
print("-----------------------")
print('R2 score:',metrics.r2_score(y_test,rf_test_predictions))
print("MAE :",metrics.mean_absolute_error(y_test,rf_test_predictions))
print("MSE :",metrics.mean_squared_error(y_test,rf_test_predictions))
print('RMSE :', np.sqrt(metrics.mean_squared_error(y_test,rf_test_predictions)))

RANDOM FOREST REGRESSION
--------------------------
Evaluating Train Data
--------------------------
R2 score: 0.9926359632470931
MAE : 0.02327728281482795
MSE : 0.0014645664897347845
RMSE : 0.038269654946638657
---------------------------------------------------------------
Evaluating Test Data
-----------------------
R2 score: 0.9517115281774261
MAE : 0.06127165802098365
MSE : 0.009558209198222522
RMSE : 0.09776609431813527


### 2. Decsion Tree Regression Model

In [10]:
dtree = DecisionTreeRegressor()
dtree.fit(X_train,y_train)

In [11]:
dt_train_predictions = dtree.predict(X_train)
dt_test_predictions = dtree.predict(X_test)

In [12]:
print("DECISION TREE REGRESSION")
print("----------------------------")
print("Evaluating Train Data")
print("----------------------------")

print('R2 score:',metrics.r2_score(y_train,dt_train_predictions))
print("MAE :",metrics.mean_absolute_error(y_train,dt_train_predictions))
print("MSE :",metrics.mean_squared_error(y_train,dt_train_predictions))
print('RMSE :', np.sqrt(metrics.mean_squared_error(y_train,dt_train_predictions)))

print('---------------------------------------------------------------')

print("Evaluating Test Data")
print("-----------------------")
print('R2 score:',metrics.r2_score(y_test,dt_test_predictions))
print("MAE :",metrics.mean_absolute_error(y_test,dt_test_predictions))
print("MSE :",metrics.mean_squared_error(y_test,dt_test_predictions))
print('RMSE :', np.sqrt(metrics.mean_squared_error(y_test,dt_test_predictions)))

DECISION TREE REGRESSION
----------------------------
Evaluating Train Data
----------------------------
R2 score: 0.9990722885309434
MAE : 0.001448000384023419
MSE : 0.00018450412121945921
RMSE : 0.013583229410543695
---------------------------------------------------------------
Evaluating Test Data
-----------------------
R2 score: 0.9146293670781378
MAE : 0.07560520234465792
MSE : 0.016898243784769385
RMSE : 0.12999324515054383


### 3. Extra Trees Regression Model

In [13]:
ext = ExtraTreesRegressor(n_estimators=150,max_depth=5)
ext.fit(X_train,y_train)

In [14]:
ex_train_pred = ext.predict(X_train)
ex_test_pred = ext.predict(X_test)

In [15]:
print("EXTRA TREE REGRESSION")
print("----------------------------")
print("Evaluating Train Data")
print("----------------------------")

print('R2 score:',metrics.r2_score(y_train,ex_train_pred ))
print("MAE :",metrics.mean_absolute_error(y_train,ex_train_pred ))
print("MSE :",metrics.mean_squared_error(y_train,ex_train_pred ))
print('RMSE :', np.sqrt(metrics.mean_squared_error(y_train,ex_train_pred )))

print('---------------------------------------------------------------')

print("Evaluating Test Data")
print("-----------------------")
print('R2 score:',metrics.r2_score(y_test,ex_test_pred))
print("MAE :",metrics.mean_absolute_error(y_test,ex_test_pred))
print("MSE :",metrics.mean_squared_error(y_test,ex_test_pred))
print('RMSE :', np.sqrt(metrics.mean_squared_error(y_test,ex_test_pred)))

EXTRA TREE REGRESSION
----------------------------
Evaluating Train Data
----------------------------
R2 score: 0.7234998185388575
MAE : 0.18124043160193976
MSE : 0.054990613675811345
RMSE : 0.23450077542688713
---------------------------------------------------------------
Evaluating Test Data
-----------------------
R2 score: 0.7201088849984318
MAE : 0.18211270127832663
MSE : 0.05540158404139254
RMSE : 0.23537541086823946


### 4. XGBoost Regression Model

In [16]:
xgb  = XGBRegressor()
xgb.fit(X_train,y_train)

In [17]:
xg_train_pred = xgb.predict(X_train)
xg_test_pred = xgb.predict(X_test)

In [18]:
print("XGBoost REGRESSION")
print("----------------------------")
print("Evaluating Train Data")
print("----------------------------")

print('R2 score:',metrics.r2_score(y_train,xg_train_pred ))
print("MAE :",metrics.mean_absolute_error(y_train,xg_train_pred ))
print("MSE :",metrics.mean_squared_error(y_train,xg_train_pred))
print('RMSE :', np.sqrt(metrics.mean_squared_error(y_train,xg_train_pred)))

print('---------------------------------------------------------------')

print("Evaluating Test Data")
print("-----------------------")
print('R2 score:',metrics.r2_score(y_test,xg_test_pred))
print("MAE :",metrics.mean_absolute_error(y_test,xg_test_pred))
print("MSE :",metrics.mean_squared_error(y_test,xg_test_pred))
print('RMSE :', np.sqrt(metrics.mean_squared_error(y_test,xg_test_pred)))


XGBoost REGRESSION
----------------------------
Evaluating Train Data
----------------------------
R2 score: 0.9397511287301181
MAE : 0.07561932630821361
MSE : 0.011982351645839218
RMSE : 0.10946392851455322
---------------------------------------------------------------
Evaluating Test Data
-----------------------
R2 score: 0.9325313156052516
MAE : 0.07985164476747697
MSE : 0.013354736139576637
RMSE : 0.11556269354586989


* From the above metrics The Random forest Regressor has the 95% r2 score with minimum errors. Thus the model explains or predicts 95% of the relationship between the dependent and independent variables. Hence choosing the Ransdom Forest Regression Model

### Predicting the Price with the model

In [49]:
import numpy as np
from scipy.special import inv_boxcox
ip = [[109,5.41,26.0,1,3,15.0,0.65,1250.0,164141591,2021]]
predict = rf.predict(np.array(ip))
selling_price = inv_boxcox(predict[0].round(2),0.09343054475928997)
print(selling_price)

716.8471318874424


##### Saving the model as pickle file

In [50]:
import pickle
with open('rf_reg.pkl','wb') as file:
    pickle.dump(rf,file)