In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
data = pd.read_csv('C:\\Users\\mahik\\OneDrive\\Desktop\\datasetrestro\\Dataset .csv')

In [17]:
print("Initial dataset:")
print(data.head())

Initial dataset:
   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  \
0  Third Floor, Century City Mall, Kalayaan Avenu...   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...   

                                     Locality  \
0   Century City Mall, Poblacion, Makati City   
1  Little Tokyo, Legaspi Village, Makati City   
2  Edsa Shangri-La, Ortigas, Mandaluyong C

In [27]:
print("Missing values:")
print(data.isnull().sum())

Missing values:
Restaurant ID           0
Restaurant Name         0
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                0
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
dtype: int64


In [19]:
data.dropna(subset=['Cuisines', 'Rating text'], inplace=True)

In [28]:
print(" Cleaned dataset:")
print(data.head())

 Cleaned dataset:
   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  \
0  Third Floor, Century City Mall, Kalayaan Avenu...   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...   

                                     Locality  \
0   Century City Mall, Poblacion, Makati City   
1  Little Tokyo, Legaspi Village, Makati City   
2  Edsa Shangri-La, Ortigas, Mandaluyong 

In [21]:
imputer = SimpleImputer(strategy="mean")
data[['Price range', 'Aggregate rating']] = imputer.fit_transform(data[['Price range', 'Aggregate rating']])


In [22]:
categorical_features = ['Locality', 'Cuisines']
numeric_features = ['Price range']
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
        ('num', StandardScaler(), numeric_features)
    ])

In [23]:
X = data.drop(columns=['Aggregate rating'])  
y = data['Aggregate rating'] 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
    ])
model.fit(X_train, y_train)

In [25]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Squared Error:", mse)
print("R-squared:", r2)

Mean Squared Error: 1.4372909075175968
R-squared: 0.37237087460929097


In [26]:
if isinstance(model.named_steps['regressor'], LinearRegression):
    coefficients = model.named_steps['regressor'].coef_  # For linear regression
    print("Feature coefficients:")
    for feature, coef in zip(X.columns, coefficients):
        print(feature, ':', coef)
    most_influential_features = sorted(zip(X.columns, coefficients), key=lambda x: abs(x[1]), reverse=True)
    print("\nMost influential features:")
    for feature, coef in most_influential_features:
        print(feature, ':', coef)
elif isinstance(model.named_steps['regressor'], DecisionTreeRegressor):
    feature_importances = model.named_steps['regressor'].feature_importances_  # For decision tree regression
    print("Feature importances:")
    for feature, importance in zip(X.columns, feature_importances):
        print(feature, ':', importance)
    most_influential_features = sorted(zip(X.columns, feature_importances), key=lambda x: x[1], reverse=True)
    print("\nMost influential features:")
    for feature, importance in most_influential_features:
        print(feature, ':', importance)

Feature coefficients:
Restaurant ID : 2.3147149039527593
Restaurant Name : 0.9410676711783859
Country Code : 0.23560334821973686
City : -1.6682446856541329
Address : 1.0681016898596791
Locality : 0.189112564481073
Locality Verbose : 0.25998542288918725
Longitude : 0.8348580277004624
Latitude : 0.12637835623749646
Cuisines : -0.03991736845903795
Average Cost for two : 0.11610617698925693
Currency : 0.4321123502354915
Has Table booking : -1.529972033928249
Has Online delivery : -0.001675740533004152
Is delivering now : -0.6996570341209096
Switch to order menu : -0.3230753186743267
Price range : 0.39676991706684317
Rating color : -0.8393417128682532
Rating text : -0.11487093201565428
Votes : 0.10995992032564239

Most influential features:
Restaurant ID : 2.3147149039527593
City : -1.6682446856541329
Has Table booking : -1.529972033928249
Address : 1.0681016898596791
Restaurant Name : 0.9410676711783859
Rating color : -0.8393417128682532
Longitude : 0.8348580277004624
Is delivering now : -