In [2]:
# 1. Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# 2. Load the dataset
df = pd.read_csv("Dataset .csv")  # Make sure the file is in your working directory

# 3. Drop irrelevant columns
df = df.drop(columns=[
    'Restaurant ID', 'Restaurant Name', 'Address', 'Locality',
    'Locality Verbose', 'Rating color', 'Rating text'
])

# 4. Handle missing values
df['Cuisines'].fillna(df['Cuisines'].mode()[0], inplace=True)

# 5. One-hot encode categorical features
categorical_cols = df.select_dtypes(include='object').columns
df_encoded = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

# 6. Split features and target
X = df_encoded.drop('Aggregate rating', axis=1)
y = df_encoded['Aggregate rating']

# 7. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 8. Train Linear Regression model
lr = LinearRegression()
lr.fit(X_train, y_train)

# 9. Predict and evaluate
y_pred = lr.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"📊 Mean Squared Error: {mse:.2f}")
print(f"📈 R² Score: {r2:.2f}")

# 10. Feature Importance (Top 10)
feature_importance = pd.Series(lr.coef_, index=X.columns)
top_features = feature_importance.abs().sort_values(ascending=False).head(10)
top_features_with_sign = feature_importance.loc[top_features.index]

print("\n🔍 Top 10 Most Influential Features:")
print(top_features_with_sign)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Cuisines'].fillna(df['Cuisines'].mode()[0], inplace=True)


📊 Mean Squared Error: 54739615923423.13
📈 R² Score: -24049646197292.99

🔍 Top 10 Most Influential Features:
Currency_Indian Rupees(Rs.)    2.221369e+08
Currency_Dollar($)             1.971925e+08
City_Mandaluyong City          1.967830e+08
City_Makati City               1.967830e+08
City_Santa Rosa                1.967830e+08
Currency_Emirati Diram(AED)    1.967830e+08
City_Pasay City                1.967830e+08
Currency_Pounds(��)            1.912618e+08
Currency_Rand(R)               1.837778e+08
Currency_Brazilian Real(R$)    1.719032e+08
dtype: float64
