In [11]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import GridSearchCV

In [12]:
df_analysis = pd.read_csv('D:/backup/Work_Extra/nawabkhan_fyp/time_generated.csv')

In [13]:
df_001 = df_analysis.copy()

In [14]:
df_001['arrival_hour']=df_001['arrival'].apply(lambda x: int(str(x).split(':')[0])+1)

In [16]:
df_001.drop(columns=['arrival','Unnamed: 0', 'Date', 'parking_entry', 'kiosk', 'department_entrance', 'department_exit', 'parking_exit', 'departure'], inplace=True)

In [17]:
x_train, x_test, y_train, y_test = train_test_split(df_001.drop('Rating', axis=1), df_001['Rating'], train_size=0.8)

In [18]:
scaler= StandardScaler()

In [19]:
scaler.fit(x_train)

StandardScaler()

In [20]:
x_train_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [21]:
dec_class = DecisionTreeClassifier(max_depth=4, max_leaf_nodes=15)
dec_class.fit(x_train_scaled, y_train)
dec_preds =  dec_class.predict(x_test_scaled)

In [22]:
print(accuracy_score(y_test, dec_preds))
print(precision_score(y_test, dec_preds, average='micro'))
print(recall_score(y_test, dec_preds, average='micro'))

0.78
0.78
0.78


In [23]:
ran_for = RandomForestClassifier(max_depth=10,max_leaf_nodes=20)
ran_for.fit(x_train_scaled, y_train)
ran_preds =  ran_for.predict(x_test_scaled)

In [24]:
print(accuracy_score(y_test, ran_preds))
print(precision_score(y_test, ran_preds, average='micro'))
print(recall_score(y_test, ran_preds, average='micro'))

0.795
0.795
0.795


In [25]:
svc_param_dict = {'kernel':('linear', 'rbf'), 'C':[1.1, 1.2, 1.3, 1.4]}

In [26]:
svc = SVC()

In [27]:
grid_s = GridSearchCV(svc, svc_param_dict)

In [28]:
grid_s.fit(x_train_scaled, y_train)

GridSearchCV(estimator=SVC(),
             param_grid={'C': [1.1, 1.2, 1.3, 1.4],
                         'kernel': ('linear', 'rbf')})

In [29]:
gs_cv_results = pd.DataFrame(grid_s.cv_results_)
gs_cv_results.where(gs_cv_results['rank_test_score']==1).dropna()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
2,0.018414,0.000969,0.001475,0.000471,1.2,linear,"{'C': 1.2, 'kernel': 'linear'}",0.9625,0.971875,0.96875,0.946875,0.95625,0.96125,0.00897,1.0


In [30]:
grid_s.score(x_test_scaled, y_test)

0.9625

In [31]:
dect_param_dict = {'max_depth':[4,5,6,8,10], 'min_samples_split':[0.001,0.01,0.05,0.1]}

In [32]:
dect = DecisionTreeClassifier()

In [33]:
grid_d = GridSearchCV(dect, dect_param_dict)

In [34]:
grid_d.fit(x_train_scaled, y_train)

GridSearchCV(estimator=DecisionTreeClassifier(),
             param_grid={'max_depth': [4, 5, 6, 8, 10],
                         'min_samples_split': [0.001, 0.01, 0.05, 0.1]})

In [35]:
gs_dect_results = pd.DataFrame(grid_d.cv_results_)
gs_dect_results.where(gs_dect_results['rank_test_score']==1).dropna()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_min_samples_split,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
12,0.002846,0.00029,0.000222,0.000392,8,0.001,"{'max_depth': 8, 'min_samples_split': 0.001}",0.809375,0.8,0.775,0.771875,0.784375,0.788125,0.014443,1.0


In [36]:
grid_d.score(x_test_scaled, y_test)

0.795

In [37]:
ranf_param_dict = {'n_estimators':[10,20,35,60], 'max_depth':[4,5,6,8,10], 'min_samples_split':[0.001,0.01,0.05,0.1]}

In [38]:
ranf= RandomForestClassifier()

In [39]:
grid_rf = GridSearchCV(ranf, ranf_param_dict)

In [40]:
grid_rf.fit(x_train_scaled, y_train)

GridSearchCV(estimator=RandomForestClassifier(),
             param_grid={'max_depth': [4, 5, 6, 8, 10],
                         'min_samples_split': [0.001, 0.01, 0.05, 0.1],
                         'n_estimators': [10, 20, 35, 60]})

In [41]:
gs_rf_results = pd.DataFrame(grid_rf.cv_results_)
gs_rf_results.where(gs_rf_results['rank_test_score']==1).dropna()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_max_depth,param_min_samples_split,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
67,0.112202,0.005509,0.007731,0.00044,10,0.001,60,"{'max_depth': 10, 'min_samples_split': 0.001, ...",0.88125,0.85,0.8625,0.821875,0.85,0.853125,0.019365,1.0


In [42]:
grid_rf.score(x_test_scaled, y_test)

0.8725

In [43]:
grabo_param_dict = {'learning_rate':[0.05,0.1,0.15],'n_estimators':[30, 35, 40], 'max_depth':[3,4,5]}

In [44]:
grabo = GradientBoostingClassifier()

In [45]:
grid_gb = GridSearchCV(grabo, grabo_param_dict)

In [46]:
grid_gb.fit(x_train_scaled, y_train)

GridSearchCV(estimator=GradientBoostingClassifier(),
             param_grid={'learning_rate': [0.05, 0.1, 0.15],
                         'max_depth': [3, 4, 5], 'n_estimators': [30, 35, 40]})

In [47]:
gs_gb_results = pd.DataFrame(grid_gb.cv_results_)
gs_gb_results.where(gs_gb_results['rank_test_score']==1).dropna()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_learning_rate,param_max_depth,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
26,0.892526,0.008229,0.003851,0.000371,0.15,5,40,"{'learning_rate': 0.15, 'max_depth': 5, 'n_est...",0.878125,0.88125,0.859375,0.85,0.85625,0.865,0.012406,1.0


In [48]:
grid_gb.score(x_test_scaled, y_test)

0.8675