<a href="https://colab.research.google.com/github/TheMishraAshwani/Project/blob/main/Predict_Restaurant_Rating.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv('/content/Dataset .csv')

# Handle missing values
data = data.dropna()

# Identify string columns (excluding the target variable)
string_cols = data.drop('Aggregate rating', axis=1).select_dtypes(include='object').columns

# Encode categorical variables
data = pd.get_dummies(data, columns=string_cols)

# Split the data into training and testing sets
X = data.drop('Aggregate rating', axis=1)
y = data['Aggregate rating']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Ensure the data types are correct
print("X_train data types:")
print(X_train.dtypes)
print("y_train data type:")
print(y_train.dtypes)

# Train the linear regression model
try:
    model = LinearRegression()
    model.fit(X_train, y_train)
except ValueError as e:
    print("Error training the linear regression model:")
    print(e)
    print("Ensure that the input data contains only numerical values.")
    exit()

# Evaluate the model on the testing data
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse:.2f}')
print(f'R-squared: {r2:.2f}')

# Analyze feature importance
feature_importances = pd.Series(model.coef_, index=X.columns).sort_values(ascending=False)
print('\nFeature Importance:')
print(feature_importances)

X_train data types:
Restaurant ID              int64
Country Code               int64
Longitude                float64
Latitude                 float64
Average Cost for two       int64
                          ...   
Rating text_Excellent       bool
Rating text_Good            bool
Rating text_Not rated       bool
Rating text_Poor            bool
Rating text_Very Good       bool
Length: 20819, dtype: object
y_train data type:
float64
Mean Squared Error: 4643.69
R-squared: -2026.78

Feature Importance:
City_Singapore                                                                    226.650344
Address_Unitech Cyber Park, Sector 39, Gurgaon                                    205.016787
Restaurant Name_Fat Lulu's                                                        203.148789
Address_E-42 & 43, Inner Circle, Connaught Place, New Delhi                       202.695280
Address_Khan Market, New Delhi                                                    199.079942
                           