In [75]:
import pandas as pd
import numpy as np

In [76]:
data=pd.read_csv("final_data.csv")

In [77]:
data

Unnamed: 0,online_order,book_table,rate,votes,location,rest_type,cuisines,cost_for_two_people,type
0,Yes,Yes,4.100000,775,Banashankari,Casual Dining,others,800.000000,Buffet
1,Yes,No,4.100000,787,Banashankari,Casual Dining,others,800.000000,Buffet
2,Yes,No,3.800000,918,Banashankari,others,others,800.000000,Buffet
3,No,No,3.700000,88,Banashankari,Quick Bites,"South Indian, North Indian",300.000000,Buffet
4,No,No,3.800000,166,Basavanagudi,Casual Dining,others,600.000000,Buffet
...,...,...,...,...,...,...,...,...,...
51712,No,No,3.600000,27,Whitefield,others,others,416.630112,Pubs and bars
51713,No,No,3.700449,0,Whitefield,others,others,600.000000,Pubs and bars
51714,No,No,3.700449,0,Whitefield,others,others,416.630112,Pubs and bars
51715,No,Yes,4.300000,236,others,others,others,416.630112,Pubs and bars


In [78]:
X=data.drop(labels=["rate"],axis=1)

In [79]:
y=data[["rate"]]

In [80]:
X

Unnamed: 0,online_order,book_table,votes,location,rest_type,cuisines,cost_for_two_people,type
0,Yes,Yes,775,Banashankari,Casual Dining,others,800.000000,Buffet
1,Yes,No,787,Banashankari,Casual Dining,others,800.000000,Buffet
2,Yes,No,918,Banashankari,others,others,800.000000,Buffet
3,No,No,88,Banashankari,Quick Bites,"South Indian, North Indian",300.000000,Buffet
4,No,No,166,Basavanagudi,Casual Dining,others,600.000000,Buffet
...,...,...,...,...,...,...,...,...
51712,No,No,27,Whitefield,others,others,416.630112,Pubs and bars
51713,No,No,0,Whitefield,others,others,600.000000,Pubs and bars
51714,No,No,0,Whitefield,others,others,416.630112,Pubs and bars
51715,No,Yes,236,others,others,others,416.630112,Pubs and bars


In [81]:
y

Unnamed: 0,rate
0,4.100000
1,4.100000
2,3.800000
3,3.700000
4,3.800000
...,...
51712,3.600000
51713,3.700449
51714,3.700449
51715,4.300000


MODELS

In [82]:
from sklearn.impute import SimpleImputer ## HAndling Missing Values
from sklearn.preprocessing import StandardScaler # HAndling Feature Scaling
from sklearn.preprocessing import OrdinalEncoder # Ordinal Encoding
from sklearn.preprocessing import OneHotEncoder
## pipelines
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [83]:
# Define which columns should be ordinal-encoded and which should be scaled
categorical_cols = ['online_order', 'book_table', 'location', 'rest_type', 'cuisines', 'type', 'cost_for_two_people']
numerical_cols = ['votes']

In [84]:
## Numerical Pipeline
num_pipeline=Pipeline(
    steps=[
    ('imputer',SimpleImputer(strategy='median')), #to cheeck empty value
    ('scaler',StandardScaler())    #to perform scaling

    ]

)

In [85]:
# Categorigal Pipeline
cat_pipeline=Pipeline(
    steps=[
    ('imputer',SimpleImputer(strategy='most_frequent')),   #handling missing value using mode ,not mean.
    ('ordinalencoder',OneHotEncoder(sparse_output=False, handle_unknown='ignore', categories='auto')), #created object for cat_encoding
    ('scaler',StandardScaler())
    ]
)

In [86]:
preprocessor=ColumnTransformer(
    [
        
        ('num_pipeline',num_pipeline,numerical_cols),
        ('cat_pipeline',cat_pipeline,categorical_cols)
    ]
)

In [87]:
## Train test split

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=30)

In [88]:
preprocessor.fit_transform(X_train)

array([[-0.12475398,  1.19211898, -1.19211898, ..., -0.05694238,
        -0.11737989, -0.03284022],
       [-0.33072423,  1.19211898, -1.19211898, ..., -0.05694238,
        -0.11737989, -0.03284022],
       [ 1.82575941,  1.19211898, -1.19211898, ..., -0.05694238,
        -0.11737989, -0.03284022],
       ...,
       [-0.35181757,  1.19211898, -1.19211898, ..., -0.05694238,
        -0.11737989, -0.03284022],
       [-0.01804651, -0.83884244,  0.83884244, ..., -0.05694238,
        -0.11737989, -0.03284022],
       [ 0.71029405, -0.83884244,  0.83884244, ..., -0.05694238,
        -0.11737989, -0.03284022]])

In [89]:
preprocessor.transform(X_test)

array([[-0.30342697, -0.83884244,  0.83884244, ..., -0.05694238,
        -0.11737989, -0.03284022],
       [-0.32948344,  1.19211898, -1.19211898, ..., -0.05694238,
        -0.11737989, -0.03284022],
       [-0.34561364, -0.83884244,  0.83884244, ..., -0.05694238,
        -0.11737989, -0.03284022],
       ...,
       [ 2.39776111,  1.19211898, -1.19211898, ..., -0.05694238,
        -0.11737989, -0.03284022],
       [ 0.2387959 , -0.83884244,  0.83884244, ..., -0.05694238,
        -0.11737989, -0.03284022],
       [-0.23518381,  1.19211898, -1.19211898, ..., -0.05694238,
        -0.11737989, -0.03284022]])

In [90]:
preprocessor.get_feature_names_out()    #list of features which will be transformed

array(['num_pipeline__votes', 'cat_pipeline__online_order_No',
       'cat_pipeline__online_order_Yes', 'cat_pipeline__book_table_No',
       'cat_pipeline__book_table_Yes', 'cat_pipeline__location_BTM',
       'cat_pipeline__location_Banashankari',
       'cat_pipeline__location_Banaswadi',
       'cat_pipeline__location_Bannerghatta Road',
       'cat_pipeline__location_Basavanagudi',
       'cat_pipeline__location_Bellandur',
       'cat_pipeline__location_Brigade Road',
       'cat_pipeline__location_Brookefield',
       'cat_pipeline__location_Church Street',
       'cat_pipeline__location_Electronic City',
       'cat_pipeline__location_Frazer Town', 'cat_pipeline__location_HSR',
       'cat_pipeline__location_Indiranagar',
       'cat_pipeline__location_JP Nagar',
       'cat_pipeline__location_Jayanagar',
       'cat_pipeline__location_Kalyan Nagar',
       'cat_pipeline__location_Kammanahalli',
       'cat_pipeline__location_Koramangala 1st Block',
       'cat_pipeline__locati

In [91]:
X_train=pd.DataFrame(preprocessor.fit_transform(X_train),columns=preprocessor.get_feature_names_out())
X_test=pd.DataFrame(preprocessor.transform(X_test),columns=preprocessor.get_feature_names_out())

In [92]:
X_train

Unnamed: 0,num_pipeline__votes,cat_pipeline__online_order_No,cat_pipeline__online_order_Yes,cat_pipeline__book_table_No,cat_pipeline__book_table_Yes,cat_pipeline__location_BTM,cat_pipeline__location_Banashankari,cat_pipeline__location_Banaswadi,cat_pipeline__location_Bannerghatta Road,cat_pipeline__location_Basavanagudi,...,cat_pipeline__cost_for_two_people_550.0,cat_pipeline__cost_for_two_people_560.0,cat_pipeline__cost_for_two_people_600.0,cat_pipeline__cost_for_two_people_650.0,cat_pipeline__cost_for_two_people_700.0,cat_pipeline__cost_for_two_people_750.0,cat_pipeline__cost_for_two_people_800.0,cat_pipeline__cost_for_two_people_850.0,cat_pipeline__cost_for_two_people_900.0,cat_pipeline__cost_for_two_people_950.0
0,-0.124754,1.192119,-1.192119,0.376047,-0.376047,-0.33256,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284
1,-0.330724,1.192119,-1.192119,0.376047,-0.376047,-0.33256,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284
2,1.825759,1.192119,-1.192119,-2.659243,2.659243,-0.33256,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284
3,-0.351818,-0.838842,0.838842,0.376047,-0.376047,-0.33256,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,3.612919,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284
4,-0.351818,-0.838842,0.838842,0.376047,-0.376047,-0.33256,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36196,-0.344373,-0.838842,0.838842,0.376047,-0.376047,-0.33256,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284
36197,0.135811,-0.838842,0.838842,0.376047,-0.376047,-0.33256,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284
36198,-0.351818,1.192119,-1.192119,0.376047,-0.376047,-0.33256,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284
36199,-0.018047,-0.838842,0.838842,0.376047,-0.376047,-0.33256,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,8.227551,-0.215249,-0.056942,-0.11738,-0.03284


In [93]:
X_test

Unnamed: 0,num_pipeline__votes,cat_pipeline__online_order_No,cat_pipeline__online_order_Yes,cat_pipeline__book_table_No,cat_pipeline__book_table_Yes,cat_pipeline__location_BTM,cat_pipeline__location_Banashankari,cat_pipeline__location_Banaswadi,cat_pipeline__location_Bannerghatta Road,cat_pipeline__location_Basavanagudi,...,cat_pipeline__cost_for_two_people_550.0,cat_pipeline__cost_for_two_people_560.0,cat_pipeline__cost_for_two_people_600.0,cat_pipeline__cost_for_two_people_650.0,cat_pipeline__cost_for_two_people_700.0,cat_pipeline__cost_for_two_people_750.0,cat_pipeline__cost_for_two_people_800.0,cat_pipeline__cost_for_two_people_850.0,cat_pipeline__cost_for_two_people_900.0,cat_pipeline__cost_for_two_people_950.0
0,-0.303427,-0.838842,0.838842,0.376047,-0.376047,3.006973,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284
1,-0.329483,1.192119,-1.192119,0.376047,-0.376047,3.006973,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284
2,-0.345614,-0.838842,0.838842,0.376047,-0.376047,-0.332560,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284
3,-0.319557,-0.838842,0.838842,0.376047,-0.376047,-0.332560,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284
4,0.352948,1.192119,-1.192119,-2.659243,2.659243,-0.332560,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15511,-0.351818,1.192119,-1.192119,0.376047,-0.376047,-0.332560,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284
15512,-0.240147,1.192119,-1.192119,0.376047,-0.376047,-0.332560,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284
15513,2.397761,1.192119,-1.192119,-2.659243,2.659243,-0.332560,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284
15514,0.238796,-0.838842,0.838842,-2.659243,2.659243,-0.332560,-0.13564,-0.11543,-0.181133,-0.114814,...,-0.123402,-0.005256,-0.276785,-0.121074,-0.195372,-0.121543,-0.215249,-0.056942,-0.11738,-0.03284


In [94]:
## Model Training

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor 
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [95]:
import numpy as np
def evaluate_model(true, predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    r2_square = r2_score(true, predicted)
    return mae, rmse, r2_square

In [96]:
## Train multiple models

models={
    'LinearRegression':LinearRegression(),
    'DecisionTreeRegressor':DecisionTreeRegressor(),
    'RandomForestRegressor':RandomForestRegressor(),
    'KNN': KNeighborsRegressor()
    }

In [97]:
trained_model_list=[]
model_list=[]
r2_list=[]

In [98]:
list(models)

['LinearRegression', 'DecisionTreeRegressor', 'RandomForestRegressor', 'KNN']

In [99]:
for i in range(len(list(models))):
    model=list(models.values())[i]
    print(model)

LinearRegression()
DecisionTreeRegressor()
RandomForestRegressor()
KNeighborsRegressor()


In [100]:
models.keys()

dict_keys(['LinearRegression', 'DecisionTreeRegressor', 'RandomForestRegressor', 'KNN'])

In [101]:
models.values()

dict_values([LinearRegression(), DecisionTreeRegressor(), RandomForestRegressor(), KNeighborsRegressor()])

In [102]:
for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train)

    #Make Predictions
    y_pred=model.predict(X_test)

    #this is a validation(test) score
    mae, rmse, r2_square=evaluate_model(y_test,y_pred)

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print('Model Training Performance')
    print("RMSE:",rmse)
    print("MAE:",mae)
    print("R2 score",r2_square*100)

    r2_list.append(r2_square)
    
    print('='*35)
    print('\n')

LinearRegression
Model Training Performance
RMSE: 0.3233925282046103
MAE: 0.24581411307340906
R2 score 31.873565548281533


DecisionTreeRegressor
Model Training Performance
RMSE: 0.15437801501098355
MAE: 0.04478980367343901
R2 score 84.47517899632211




  return fit_method(estimator, *args, **kwargs)


RandomForestRegressor
Model Training Performance
RMSE: 0.13207740810298366
MAE: 0.05663679691775938
R2 score 88.6364828414279


KNN
Model Training Performance
RMSE: 0.25977128115620773
MAE: 0.1675664389607294
R2 score 56.04199550068498




In [103]:
model_list

['LinearRegression', 'DecisionTreeRegressor', 'RandomForestRegressor', 'KNN']

In [104]:
r2_list

[0.31873565548281535,
 0.8447517899632211,
 0.8863648284142791,
 0.5604199550068498]