# Project : Restaurant Revenue Prediction

![image.png](attachment:image.png)

**Import the libraries**


In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

**Load the dataset**

In [33]:
df=pd.read_csv("revenue_prediction.csv")

In [34]:
df.head()

Unnamed: 0,Id,Name,Franchise,Category,City,No_Of_Item,Order_Placed,Revenue
0,101,HungryHowie'sPizza,Yes,Mexican,Bengaluru,55,5.5,5953753
1,102,CharleysPhillySteaks,No,Varied Menu,Gurugram,72,6.8,7223131
2,103,Chuy's,Yes,Chicken,Pune,25,1.9,2555379
3,104,O'Charley's,Yes,Italian/Pizza,Mumbai,18,2.5,2175511
4,105,PolloTropical,Yes,Pizza,Noida,48,4.2,4816715


In [35]:
df.head(10)

Unnamed: 0,Id,Name,Franchise,Category,City,No_Of_Item,Order_Placed,Revenue
0,101,HungryHowie'sPizza,Yes,Mexican,Bengaluru,55,5.5,5953753
1,102,CharleysPhillySteaks,No,Varied Menu,Gurugram,72,6.8,7223131
2,103,Chuy's,Yes,Chicken,Pune,25,1.9,2555379
3,104,O'Charley's,Yes,Italian/Pizza,Mumbai,18,2.5,2175511
4,105,PolloTropical,Yes,Pizza,Noida,48,4.2,4816715
5,106,Maggiano'sLittleItaly,Yes,Seafood,Noida,56,4.9,4517319
6,107,Cicis,Yes,Steak,Noida,58,5.0,5966635
7,108,LongJohnSilver's,Yes,Pizza,Mumbai,49,4.3,6491607
8,109,SaltgrassSteakHouse,Yes,Mexican,Mumbai,59,4.8,5152497
9,110,ChuckE.Cheese's,Yes,Steak,Pune,76,5.3,4544227


In [36]:
df.tail(10)

Unnamed: 0,Id,Name,Franchise,Category,City,No_Of_Item,Order_Placed,Revenue
90,191,FarmerBoys,No,Family Style,Bengaluru,40,3.3,3426169
91,192,DonatosPizza,No,Mexican,Bengaluru,27,1.9,2083447
92,193,Shoney's,Yes,Varied Menu,Bengaluru,72,6.5,6782425
93,194,TacoBueno,No,Snack,Bengaluru,50,3.3,3410878
94,195,ClaimJumper,Yes,Bakery Cafe,Bengaluru,42,3.5,3753720
95,196,Wetzel'sPretzels,No,Italian/Pizza,Bengaluru,19,1.1,1270499
96,197,LaMadeleineCountryFrenchCafe,Yes,Varied Menu,Bengaluru,75,6.3,6412623
97,198,Giordano's,Yes,Varied Menu,Gurugram,77,6.2,6694797
98,199,IslandsFineBurgers&Drinks,Yes,Sports Bar,Pune,25,2.1,2344689
99,200,Mimi'sBistro&Bakery,No,BBQ,Mumbai,50,4.4,4567678


**Examining missing values in a dataset**

In [13]:
df.isnull().sum()

Id              0
Name            0
Franchise       0
Category        0
City            0
No_Of_Item      0
Order_Placed    0
Revenue         0
dtype: int64

**Drop irrelevant columns**

In [14]:
df=df.drop(columns=["Id","Name","Franchise","Category","City","No_Of_Item"])

In [15]:
df.head()

Unnamed: 0,Order_Placed,Revenue
0,5.5,5953753
1,6.8,7223131
2,1.9,2555379
3,2.5,2175511
4,4.2,4816715


In [16]:
df.shape

(100, 2)

In [37]:
df.describe()

Unnamed: 0,Id,No_Of_Item,Order_Placed,Revenue
count,100.0,100.0,100.0,100.0
mean,150.5,49.08,4.086,4395161.0
std,29.011492,22.370923,2.055101,2659932.0
min,101.0,18.0,1.0,849870.0
25%,125.75,34.75,2.75,2688328.0
50%,150.5,45.0,3.65,3911401.0
75%,175.25,57.25,5.1,5330084.0
max,200.0,126.0,13.0,19696940.0


# Building Model

In [38]:
x=df.drop(['Revenue'],axis=1)
y=df['Revenue']

In [40]:
from sklearn.preprocessing import StandardScaler,LabelEncoder

In [41]:
df1=df.copy()
df1=df1.apply(LabelEncoder().fit_transform)
df1.head()

Unnamed: 0,Id,Name,Franchise,Category,City,No_Of_Item,Order_Placed,Revenue
0,0,40,1,12,0,29,38,79
1,1,14,0,19,1,40,46,91
2,2,16,1,5,4,4,6,24
3,3,60,1,11,2,0,10,13
4,4,68,1,13,3,25,25,67


In [43]:
ss=StandardScaler().fit(df1.drop('Revenue',axis=1))

In [45]:
X=ss.transform(df1.drop('Revenue',axis=1))
y=df1['Revenue']

**Split the dataset into Training set and Test set**

In [49]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=40)

**Training the Simple Linear Regression model on the Training set**

In [50]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score
regressor=LinearRegression()

In [51]:
model=regressor.fit(X_train,y_train)
prediction=model.predict(X_test)

print("Acc on training data:{:,.3f}".format(regressor.score(X_train,y_train)))
print("Acc on test data:{:,.3f}".format(regressor.score(X_test,y_test)))

Acc on training data:0.927
Acc on test data:0.876


**Training the Simple Logistic Regression model on the Training set**

In [52]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

lr =LogisticRegression()

model=lr.fit(X_train,y_train)
prediction=model.predict(X_test)

print("Acc on training data:{:,.3f}".format(lr.score(X_train,y_train)))
print("Acc on test data:{:,.3f}".format(lr.score(X_test,y_test)))

Acc on training data:0.929
Acc on test data:0.000


**Random Forest Classifier**

In [57]:
from sklearn.ensemble import RandomForestClassifier

rfc =RandomForestClassifier()

model1=rfc.fit(X_train,y_train)
prediction1=model1.predict(X_test)

print("Acc on training data:{:,.3f}".format(rfc.score(X_train,y_train)))
print("Acc on test data:{:,.3f}".format(rfc.score(X_test,y_test)))

Acc on training data:1.000
Acc on test data:0.000


In [58]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [59]:
print(confusion_matrix(y_test,prediction1))

[[0 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 0]]
