In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d mohdshahnawazaadil/restaurant-dataset

In [None]:
from zipfile import ZipFile
file_name = "restaurant-dataset.zip"

with ZipFile(file_name, 'r') as zip:
  zip.extractall()
  print("Done")

In [None]:
df=pd.read_csv('Dataset .csv')

In [None]:
df

In [None]:
df.info

In [None]:
df.describe()

In [None]:
df.duplicated().sum()

In [None]:
df.isna().sum()

In [None]:
df.columns

In [None]:
#Group by city and calculate average price
df.groupby('City')['Average Cost for two'].mean().sort_values(ascending=False).head().plot(kind='bar',color='red')
plt.title('Top 5 cities with highest average cost for two')
plt.xlabel('City')
plt.ylabel('Average Cost for two')
plt.grid()
plt.show()

In [None]:
#Cusinine count
df.groupby('Cuisines')['Votes'].sum().sort_values(ascending=False).head().plot(kind='bar',color='blue')
plt.title('Top 5 cuisines with highest votes')
plt.xlabel('Cuisines')
plt.ylabel('Votes')

plt.show()

In [None]:
data_Cuisines_Votes = df.groupby('Cuisines')['Votes'].sum().reset_index()


In [None]:
data_Cuisines_Votes[data_Cuisines_Votes['Votes']!=0]

In [None]:
sns.pairplot(data=df[["Average Cost for two", "Votes", "Aggregate rating"]])

In [None]:
df["Has Online delivery"].value_counts().plot(kind='pie')
plt.title('Has Online delivery?')
plt.legend()
plt.show()


In [None]:
df.columns

In [None]:
df[['Aggregate rating','Rating text']].sort_values(by='Aggregate rating',ascending=False).head(7000)

In [None]:
df= df[df["Rating text"] != "Not rated"]

In [None]:
df["Rating text"].value_counts().plot(kind='pie')
plt.title('Rating text')
plt.legend()
plt.show()

In [None]:
X=df[['Average Cost for two','Has Table booking','Has Online delivery','Price range','Votes','Aggregate rating']]

In [None]:
#above 2 and below 2.5 Poor
#above 2.5 and below 3.5 Average
#above 3.5 and below 4.0 Good
#above 4.0 and below 4.5 Very Good
#above 4.5 Excellent

In [None]:
y= df[["Aggregate rating"]]

In [None]:
X

In [None]:
from sklearn import preprocessing
labelencoder=preprocessing.LabelEncoder()

In [None]:
X['Has Table booking']=labelencoder.fit_transform(X['Has Table booking'])

In [None]:
#Has Table booking 0 is no 1 is yes
X['Has Online delivery']=labelencoder.fit_transform(X['Has Online delivery'])

In [None]:
X

In [None]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()

In [None]:
X=scaler.fit_transform(X)

In [None]:
from sklearn.model_selection import train_test_split


In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

In [None]:
from sklearn.metrics import mean_squared_error
import numpy as np

In [None]:
def modelresults(predictions):
    print("Mean Squared Error:{}".format(mean_squared_error (y_test, predictions)))
    print('Root Mean Squared Error:{}'.format(np.sqrt(mean_squared_error(y_test, predictions))))

In [None]:
from sklearn.linear_model import LinearRegression
lr=LinearRegression()

In [None]:
lr.fit(X_train,y_train)

In [None]:
predictions_from_lr = lr.predict(X_test)

In [None]:
modelresults(predictions_from_lr)

In [None]:
from sklearn.metrics import mean_absolute_error
# Calculate the Mean Absolute Error (MAE)
MAE = mean_absolute_error(y_test, predictions_from_lr)
print("Mean Absolute Error: ", MAE)

# Calculate the Accuracy
Accuracy = 1 - (MAE / np.max(y_test))
print("Accuracy: ", Accuracy)

# Calculate the Percentage of Accuracy
Percentage_of_Accuracy = Accuracy * 100
print("Percentage of Accuracy: ", Percentage_of_Accuracy)

In [None]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

In [None]:
svrmodel=SVR()

In [None]:
param_gridsvr={ "C":[0.1,0.5],"kernel":["rbf","poly"],"degree":[2,3]}

In [None]:
gridsearchsvr=GridSearchCV(svrmodel,param_gridsvr)

In [None]:
gridsearchsvr.fit(X_train,y_train)

In [None]:
predsgridSearchsvr=gridsearchsvr.predict(X_test)

In [None]:
modelresults(predsgridSearchsvr)

In [None]:
from sklearn.metrics import mean_absolute_error
# Calculate the Mean Absolute Error (MAE)
MAE = mean_absolute_error(y_test, predsgridSearchsvr)
print("Mean Absolute Error: ", MAE)

# Calculate the Accuracy
Accuracy = 1 - (MAE / np.max(y_test))
print("Accuracy: ", Accuracy)

# Calculate the Percentage of Accuracy
Percentage_of_Accuracy = Accuracy * 100
print("Percentage of Accuracy: ", Percentage_of_Accuracy)

In [None]:
from sklearn.tree import DecisionTreeRegressor


In [None]:
param_grid={
    "max_depth":[3,4,5,6,7,8,9,10],
    "min_samples_leaf":[1,2,3],
    "min_samples_split":[2,3,4,5]
}

In [None]:
treeModel=DecisionTreeRegressor()

In [None]:
grid_tree=GridSearchCV(treeModel,param_grid=param_grid)

In [None]:
grid_tree.fit(X_train,y_train)

In [None]:
treepredictions = grid_tree.predict(X_test)

In [None]:
modelresults(treepredictions)

In [None]:
from sklearn.metrics import mean_absolute_error
# Calculate the Mean Absolute Error (MAE)
MAE = mean_absolute_error(y_test, treepredictions)
print("Mean Absolute Error: ", MAE)

# Calculate the Accuracy
Accuracy = 1 - (MAE / np.max(y_test))
print("Accuracy: ", Accuracy)

# Calculate the Percentage of Accuracy
Percentage_of_Accuracy = Accuracy * 100
print("Percentage of Accuracy: ", Percentage_of_Accuracy)

In [None]:
from sklearn.ensemble import RandomForestRegressor


In [None]:
rfrmodel=RandomForestRegressor()

In [None]:
param_grid = {"max_depth": [3, 4, 5, 6, 7, 8, 9, 10], "n_estimators": [2, 5, 10, 50, 100]}


In [None]:
gridrfr= GridSearchCV(rfrmodel,param_grid)

In [None]:
gridrfr.fit(X_train,y_train)

In [None]:
RandomForestRegressorPredictions=gridrfr.predict(X_test)


In [None]:
modelresults(RandomForestRegressorPredictions)

In [None]:
gridrfr.best_params_

In [None]:
grid_tree.best_params_

In [None]:
from sklearn.metrics import mean_absolute_error
# Calculate the Mean Absolute Error (MAE)
MAE = mean_absolute_error(y_test, RandomForestRegressorPredictions)
print("Mean Absolute Error: ", MAE)

# Calculate the Accuracy
Accuracy = 1 - (MAE / np.max(y_test))
print("Accuracy: ", Accuracy)

# Calculate the Percentage of Accuracy
Percentage_of_Accuracy = Accuracy * 100
print("Percentage of Accuracy: ", Percentage_of_Accuracy)

In [None]:
from sklearn.neighbors import KNeighborsRegressor
knn= KNeighborsRegressor()
knn_param_grid={"n_neighbors":[9,10,11,12,13,14,15,16]}
knn_grid_search=GridSearchCV(knn,knn_param_grid)

In [None]:
knn_grid_search.fit(X_train,y_train)

In [None]:
knn_predictions=knn_grid_search.predict(X_test)

In [None]:
modelresults(knn_predictions)

In [None]:
knn_grid_search.best_params_

In [None]:
from sklearn.ensemble import AdaBoostRegressor
ada=AdaBoostRegressor()
ada_param_grid={"n_estimators": [50,100,150,200,250],
                "learning_rate": [0.1,0.25,0.5,0.75,1]}
ada_grid_search= GridSearchCV(ada,ada_param_grid)

In [None]:
ada_grid_search.fit(X_train,y_train)

In [None]:
adapreds=ada_grid_search.predict(X_test)

In [None]:
modelresults(adapreds)

In [None]:
gridrfr

In [None]:
import joblib
joblib.dump(gridrfr,"mlmodel.pkl")

In [None]:
scaler

In [None]:
joblib.dump(scaler,"Scaler.pkl")

In [None]:
!pip install streamlit
import streamlit as st

from sklearn.preprocessing import StandardScaler
import joblib
st.set_page_config(layout="wide")

scaler= joblib.load("Scaler.pkl")
scaler.n_features_in_ = 4
st.title("Resturant Rating Prediction App")


st.caption("This app helps you to predict a restaurant rating review class based on the features you provide")

averagecost = st.number_input("Average Cost for two", min_value=50, max_value=50000, value=1000, step=200)

tableBooking = st.selectbox("Table Booking ?", ["Yes", "No"])

OnlineDelivery = st.selectbox("Online Delivery ?", ["Yes", "No"])

pricerange = st.selectbox("Price Range (1 Cheapest ,4 Most Expensive)", ["1", "2", "3", "4"])

predictionbutton=st.button("Predict the Rating Class")
st.divider()

model=joblib.load("mlmodel.pkl")

bookingstatus=1 if tableBooking=="Yes" else 0
deliverystatus=1 if OnlineDelivery=="Yes" else 0



values = [[averagecost, bookingstatus, deliverystatus, pricerange]]
my_X_values = np.array(values)

scaler = StandardScaler()
X = scaler.fit_transform(my_X_values)

# Make the prediction
if predictionbutton:
    st.snow()
    prediction = model.predict(X)
    st.write(prediction)
    if prediction <2.5:
        st.write("The restaurant rating is Bad")
    elif prediction <3.5:
        st.write("The resturant rating is Average")
    elif prediction <4.0:
        st.write("The resturant rating is Good")
    else:
        st.write("The resturant rating is Excellent")

In [None]:
!streamlit run app.py