In [51]:
import pandas as pd
import numpy as np

In [7]:
# Load Data from .pkl File
df = pd.read_pickle('zomato_cleaned.pkl')
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes,Country,Average Cost for two Dollar($)
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,No,No,No,3,4.8,Dark Green,Excellent,314,Phillipines,99.0
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,No,No,No,3,4.5,Dark Green,Excellent,591,Phillipines,108.0
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,No,No,No,4,4.4,Green,Very Good,270,Phillipines,360.0
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,No,No,No,4,4.9,Dark Green,Excellent,365,Phillipines,135.0
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,No,No,No,4,4.8,Dark Green,Excellent,229,Phillipines,135.0


# Data Preprocessing

In [9]:
# Handle missing values
df.fillna(0, inplace=True)

In [11]:
# Encode categorical features
from sklearn.preprocessing import LabelEncoder  

le = LabelEncoder()
df['Country'] = le.fit_transform(df['Country'])
df['Cuisines'] = le.fit_transform(df['Cuisines'])
df['Has Online delivery'] = le.fit_transform(df['Has Online delivery'])


In [14]:
# Select relevant features
X = df[['Country', 'Price range', 'Cuisines', 'Has Online delivery']]
y = df['Aggregate rating']


In [16]:
X

Unnamed: 0,Country,Price range,Cuisines,Has Online delivery
0,6,3,920,0
1,6,3,1111,0
2,6,4,1671,0
3,6,4,1126,0
4,6,4,1122,0
...,...,...,...,...
9546,11,3,1813,0
9547,11,3,1825,0
9548,11,4,1110,0
9549,11,4,1657,0


In [18]:
y

0       4.8
1       4.5
2       4.4
3       4.9
4       4.8
       ... 
9546    4.1
9547    4.2
9548    3.7
9549    4.0
9550    4.0
Name: Aggregate rating, Length: 9551, dtype: float64

# Train-Test Split

In [23]:
from sklearn.model_selection import train_test_split  

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2, 
                                                    random_state=42)
X_train.shape, X_test.shape

((7640, 4), (1911, 4))

# Train Model

In [26]:
from sklearn.linear_model import LinearRegression  

model = LinearRegression()
model

In [28]:
model.fit(X_train, y_train)

In [30]:
from sklearn.ensemble import RandomForestRegressor  

rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model

In [32]:
rf_model.fit(X_train, y_train)

# Evaluate Model

In [35]:
from sklearn.metrics import mean_squared_error, r2_score  

y_pred = model.predict(X_test)
y_pred

array([1.80654818, 3.23640649, 2.42219506, ..., 2.55046122, 3.17911502,
       4.4996075 ])

In [37]:
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("R-squared:", r2_score(y_test, y_pred))

Mean Squared Error: 1.6582985445324019
R-squared: 0.2714327162747501


# Save Model

In [40]:
import joblib  

joblib.dump(model, 'rating_prediction_model.pkl')

['rating_prediction_model.pkl']

# Load and Use Model

In [43]:
model = joblib.load('rating_prediction_model.pkl')

sample_input = [[10, 2, 15, 1]]  # Example: Country, Price Range, Cuisine, Online Delivery
predicted_rating = model.predict(sample_input)
print("Predicted Rating:", predicted_rating)

Predicted Rating: [4.27267818]




In [None]:
# save