## Model Creation & Evaluation

In [1]:
%%capture
#Load feature_selection-engineering file
%run feature_selection-engineering.ipynb

### Train Test Split

In [2]:
#Get dependent and independent variables
X = traffic_data[[ "Year", "Month", "DayOfWeek", "HourOfDay", "Junction"]]
y = traffic_data[["Vehicles"]]

#Split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [3]:
#Model evaluation function
def model_eval(model_name):
    model = model_name #Assign the model
    model.fit(X_train, y_train) #Fit the model
    
    #Make predictions on the test set
    y_pred = model.predict(X_test)

    #Different metrics
    rmse = np.sqrt(mean_squared_error(y_test, y_pred)) #Calculate the RMSE
    r2 = r2_score(y_test, y_pred) #Calculate the R-squared
    mae = mean_absolute_error(y_test, y_pred) #Calculate MAE

    #Print the results
    print("RMSE: ", rmse)
    print("R-squared: ", r2)
    print("MAE: ", mae)

#### XGBoost Regressor

In [4]:
#XGBoost
model_eval(XGBRegressor())

RMSE:  5.046367826810308
R-squared:  0.9316728843005203
MAE:  3.36985913373724


#### Random Forest Regressor

In [5]:
#RandomForest
model_eval(RandomForestRegressor())

RMSE:  4.547099964859625
R-squared:  0.9445241089559638
MAE:  2.965587559563816


### With dummy variables

In [6]:
#Split dataset into train and test
new_X_train, new_X_test, new_y_train, new_y_test = train_test_split(new_X, new_y, test_size = 0.2, random_state = 42)

In [7]:
#Model evaluation function
def new_model_eval(model_name):
    model = model_name #Assign the model
    model.fit(new_X_train, new_y_train) #Fit the model
    
    #Make predictions on the test set
    new_y_pred = model.predict(new_X_test)

    #Different metrics
    rmse = np.sqrt(mean_squared_error(new_y_test, new_y_pred)) #Calculate the RMSE
    r2 = r2_score(new_y_test, new_y_pred) #Calculate the R-squared
    mae = mean_absolute_error(new_y_test, new_y_pred) #Calculate MAE

    #Print the results
    print("RMSE: ", rmse)
    print("R-squared: ", r2)
    print("MAE: ", mae)

#### XGBoost Regressor

In [8]:
#XGBoost
new_model_eval(XGBRegressor())

RMSE:  5.5934440588926435
R-squared:  0.916055184190329
MAE:  3.764442518738149


#### Random Forest Regressor

In [9]:
#RanadomForest
new_model_eval(RandomForestRegressor())

RMSE:  4.939851720086754
R-squared:  0.9345268692365941
MAE:  3.2263316743928967


### Cross Validation 

In [10]:
#Define the cross-validation method
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
def cv_model_eval(model):
    #Use cross-validation to evaluate the model
    scores = cross_val_score(model, X, y, cv = kfold, scoring = 'neg_mean_squared_error')

    #Calculate the RMSE, R-squared, and MAE
    rmse = np.sqrt(-scores.mean())
    r2 = cross_val_score(model, X, y, cv=kfold, scoring='r2').mean()
    mae = cross_val_score(model, X, y, cv=kfold, scoring='neg_mean_absolute_error').mean()

    #Print the results
    print("RMSE: ", rmse)
    print("R-squared: ", r2)
    print("MAE: ", -mae)

#### XGBRegressor

In [11]:
#XGBoost
cv_model_eval(XGBRegressor())

RMSE:  5.113697453557049
R-squared:  0.9331755953442382
MAE:  3.386168236494238


#### RanadomForest

In [12]:
#RanadomForest
cv_model_eval(RandomForestRegressor())

RMSE:  4.547490007850059
R-squared:  0.9476054755763281
MAE:  2.925487404355202


### With dummy variables

#### XGBRegressor

In [13]:
#Define the cross-validation method
kfold = KFold(n_splits=10, shuffle=True, random_state=42)
def cv_model_eval_d(model):
    #Use cross-validation to evaluate the model
    scores = cross_val_score(model, new_X, new_y, cv = kfold, scoring = 'neg_mean_squared_error')

    #Calculate the RMSE, R-squared, and MAE
    rmse = np.sqrt(-scores.mean())
    r2 = cross_val_score(model,new_X, new_y, cv=kfold, scoring='r2').mean()
    mae = cross_val_score(model, new_X, new_y, cv=kfold, scoring='neg_mean_absolute_error').mean()

    #Print the results
    print("RMSE: ", rmse)
    print("R-squared: ", r2)
    print("MAE: ", -mae)

In [14]:
#XGBoost
cv_model_eval_d(XGBRegressor())

RMSE:  5.72827796050781
R-squared:  0.9161441948318643
MAE:  3.8506377245752335


#### RanadomForest

In [15]:
#RanadomForest
cv_model_eval_d(RandomForestRegressor())

RMSE:  4.975028790492222
R-squared:  0.9367603799076083
MAE:  3.210741678523763
