In [1]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Restaurants.csv')

In [4]:
print(df.head())

   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  \
0  Third Floor, Century City Mall, Kalayaan Avenu...   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...   

                                     Locality  \
0   Century City Mall, Poblacion, Makati City   
1  Little Tokyo, Legaspi Village, Makati City   
2  Edsa Shangri-La, Ortigas, Mandaluyong City   
3      SM 

In [5]:
missing_values = df.isnull().sum()
print(missing_values)

Restaurant ID           0
Restaurant Name         0
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                9
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
dtype: int64


In [6]:
duplicate_values = df.duplicated().sum()
print(duplicate_values)

0


In [7]:
from sklearn.preprocessing import LabelEncoder

label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])

In [8]:
print(df.head())

   Restaurant ID  Restaurant Name  Country Code  City  Address  Locality  \
0        6317637             3748           162    73     8685       171   
1        6304287             3172           162    73     6055       593   
2        6300002             2896           162    75     4684       308   
3        6318506             4707           162    75     8690       862   
4        6314302             5523           162    75     8689       862   

   Locality Verbose   Longitude   Latitude  Cuisines  ...  Currency  \
0               172  121.027535  14.565443       920  ...         0   
1               601  121.014101  14.553708      1111  ...         0   
2               314  121.056831  14.581404      1671  ...         0   
3               875  121.056475  14.585318      1126  ...         0   
4               875  121.057508  14.584450      1122  ...         0   

   Has Table booking  Has Online delivery  Is delivering now  \
0                  1                    0           

In [9]:
from sklearn.model_selection import train_test_split

X = df.drop('Aggregate rating', axis=1)
y = df['Aggregate rating']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

lr = LinearRegression()
rf = RandomForestRegressor(n_estimators=100, random_state=42)
svr = SVR()
dt = DecisionTreeRegressor(random_state=42)
knn = KNeighborsRegressor(n_neighbors=10)

models = {
    'Linear Regression': lr,
    'Random Forest': rf,
    'Support Vector Regression': svr,
    'Decision Tree': dt,
    'K-Nearest Neighbors': knn
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"{name} Model Performance:")
    print(f"R-squared: {r2_score(y_test, y_pred)}")
    print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}\n")

Linear Regression Model Performance:
R-squared: 0.4645125017763094
Mean Squared Error: 1.2188279089052758

Random Forest Model Performance:
R-squared: 0.9880352967635689
Mean Squared Error: 0.027232968602825716

Support Vector Regression Model Performance:
R-squared: 0.08849191345080598
Mean Squared Error: 2.0746917505344156

Decision Tree Model Performance:
R-squared: 0.9753359141386785
Mean Squared Error: 0.056138147566718984

K-Nearest Neighbors Model Performance:
R-squared: 0.3180733391670123
Mean Squared Error: 1.5521394034536893



In [12]:
model = rf
import pickle as pkl
pkl.dump(model, open('/content/drive/MyDrive/Colab Notebooks/model.pkl', 'wb'))