# Import the necessary libraries for Data Analysis

In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,r2_score
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

# Loading the Restaurant dataset into a DataFrame

In [None]:
df=pd.read_csv('dataset.csv',encoding='ISO-8859-1')
df.head()

# LEVEL 3-Task 1:Predictive Modeling

# LEVEL 3-Task 1(i):Build a regression model to predict the aggregate rating of a restaurant based on available features.

In [29]:
# Selecting relevant features and the target variable
features=[
    'Average Cost for two',
    'Has Table booking',
    'Has Online delivery',
    'Is delivering now',
    'Price range',
    'Votes'
]
# Ensure binary categorical features are encoded as integers (0 or 1)
def convert_to_binary(value):
    if isinstance(value, str):
        return 1 if value.lower() == 'yes' else 0
    return value

df['Has Table booking']=df['Has Table booking'].apply(convert_to_binary)
df['Has Online delivery']=df['Has Online delivery'].apply(convert_to_binary)
df['Is delivering now']=df['Is delivering now'].apply(convert_to_binary)

X=df[features]
y=df['Aggregate rating']


      Average Cost for two  Has Table booking  Has Online delivery  \
0                     1100                  1                    0   
1                     1200                  1                    0   
2                     4000                  1                    0   
3                     1500                  0                    0   
4                     1500                  1                    0   
...                    ...                ...                  ...   
9546                    80                  0                    0   
9547                   105                  0                    0   
9548                   170                  0                    0   
9549                   120                  0                    0   
9550                    55                  0                    0   

      Is delivering now  Price range  Votes  
0                     0            3    314  
1                     0            3    591  
2                    

# LEVEL 3-Task 1(ii):Split the dataset into training and testing sets and evaluate the model's performance using appropriate metrics.

In [25]:
# Splitting the dataset into training and testing sets
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

# Model Building
lr_model=LinearRegression()
lr_model.fit(X_train,y_train)
y_pred_lr=lr_model.predict(X_test)

# Model Evaluation
mse=mean_squared_error(y_test,y_pred_lr)
r2=r2_score(y_test,y_pred_lr)
print(f"Linear Regression - Mean Squared Error:{mse:.2f},R^2 Score:{r2:.2f}")


Linear Regression - Mean Squared Error: 1.68, R^2 Score: 0.26


# LEVEL 3-Task 1(iii):Experiment with different algorithms (e.g., linear regression, decision trees, random forest) and compare their performance.


In [30]:
# Decision Tree Regressor
dt_model=DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train,y_train)
y_pred_dt=dt_model.predict(X_test)

# Random Forest Regressor
rf_model=RandomForestRegressor(random_state=42)
rf_model.fit(X_train,y_train)
y_pred_rf=rf_model.predict(X_test)

# Model Evaluation
def evaluate_model(y_test,y_pred,model_name):
    mse=mean_squared_error(y_test,y_pred)
    r2=r2_score(y_test,y_pred)
    print(f"{model_name}-Mean Squared Error:{mse:.2f},R^2 Score:{r2:.2f}")

# Evaluating Linear Regression Model
evaluate_model(y_test,y_pred_lr,"Linear Regression")

# Evaluating Decision Tree Model
evaluate_model(y_test,y_pred_dt, "Decision Tree")

# Evaluating Random Forest Model
evaluate_model(y_test,y_pred_rf, "Random Forest")

# Comparing Model Performance
models=['Linear Regression', 'Decision Tree', 'Random Forest']
mse_scores=[mean_squared_error(y_test,y_pred_lr), mean_squared_error(y_test,y_pred_dt),mean_squared_error(y_test,y_pred_rf)]
r2_scores=[r2_score(y_test,y_pred_lr),r2_score(y_test,y_pred_dt),r2_score(y_test,y_pred_rf)]

comparison_df=pd.DataFrame({
    'Model':models,
    'Mean Squared Error':mse_scores,
    'R^2 Score':r2_scores
})

print(comparison_df)


Linear Regression-Mean Squared Error:1.68,R^2 Score:0.26
Decision Tree-Mean Squared Error:0.20,R^2 Score:0.91
Random Forest-Mean Squared Error:0.13,R^2 Score:0.94
               Model  Mean Squared Error  R^2 Score
0  Linear Regression            1.676361   0.263497
1      Decision Tree            0.200943   0.911717
2      Random Forest            0.132962   0.941584
