# Restaurant Revenue Pridiction

### Import Library 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
%matplotlib inline
sns.set()

In [None]:
#load data
df_train = pd.read_csv(r"C:\Users\SONY\Desktop\projects of Machine Learning\restaurant revenue\train.csv")
df_test = pd.read_csv(r"C:\Users\SONY\Desktop\projects of Machine Learning\restaurant revenue\test.csv")

In [None]:
df_train.head(5)

In [None]:
df_test.head(5)

In [None]:
print(df_train.shape)
print(df_test.shape)

### Missing Values 

In [None]:
print(df_train.info())
print("*" * 100)
print(df_test.info())

In [None]:
df_train.isnull().sum()[0:20]

In [None]:
sns.heatmap(df_train.isnull(), yticklabels= False)

In [None]:
df_test.isnull().sum()[0:20]

In [None]:
sns.heatmap(df_test.isnull(), yticklabels= False)

In [None]:
df_test['City'].value_counts()

In [None]:
df_train.describe()

In [None]:
df_test.describe()

In [None]:
import unicodedata

In [None]:
def remove_unicode(data):
    city_name = ""
    for text in data:
        text = (text.encode("ascii", "ignore")).decode("utf-8")
        city_name += text
    return city_name

In [None]:
df_train['City']=df_train.City.apply(remove_unicode)
df_train

In [None]:
df_test['City']=df_train.City.apply(remove_unicode)
df_test.head()

### Graphical Data Representation 

In [None]:
sns.distplot(df_train['revenue'])
#skewness
print("Skewness: %f" % df_train['revenue'].skew())

In [None]:
df_train['revenue_1'] = np.log(df_train['revenue'])

sns.distplot(df_train['revenue_1'])
# skewness and kurtosis
print("Skewness: %f" % df_train['revenue_1'].skew())

In [None]:
sns.countplot("Type",data=df_train)

In [None]:
sns.barplot(x="Type",y='revenue_1',data=df_train)

In [None]:
sns.boxplot('City Group','revenue_1',data=df_train)

In [None]:
sns.countplot("revenue",data = df_train)

In [None]:
sns.countplot("City",data = df_train)

In [None]:
sns.countplot("City Group",data = df_train)

In [None]:
sns.countplot("Type",data = df_train)

In [None]:
sns.countplot("City",hue='revenue',data = df_train)

### Converting Categorical Features 

In [None]:
numerical_feats = df_train.dtypes[df_train.dtypes != "object"].index
print("Number of Numerical features: ", len(numerical_feats))

categorical_feats = df_train.dtypes[df_train.dtypes == "object"].index
print("Number of Categorical features: ", len(categorical_feats))

In [None]:
numerical_feats = df_test.dtypes[df_test.dtypes != "object"].index
print("Number of Numerical features: ", len(numerical_feats))

categorical_feats = df_test.dtypes[df_test.dtypes == "object"].index
print("Number of Categorical features: ", len(categorical_feats))

In [None]:
print(df_train[numerical_feats].columns)
print(df_train[categorical_feats].columns)

In [None]:
print(df_test[numerical_feats].columns)
print(df_test[categorical_feats].columns)

In [None]:
df_train[numerical_feats].head()

In [None]:
df_train[categorical_feats].head()

In [None]:
df_train.rename(columns={"City Group":"city_group"},inplace=True)
df_train.columns

In [None]:
df_test.rename(columns={"City Group":"city_group"},inplace=True)
df_test.columns

In [None]:
city = pd.get_dummies(df_train["City"],drop_first=1)
city.head()

In [None]:
type= pd.get_dummies(df_train["Type"],drop_first=1)
type.head()

In [None]:
city_group= pd.get_dummies(df_train["city_group"],drop_first=1)
city_group.head()

In [None]:
df_train = pd.concat([df_train,city,type,city_group],axis= 1)
df_train

In [None]:
df_train.drop(["City","Type","city_group"],axis=1,inplace= True)
df_train

In [None]:
city = pd.get_dummies(df_test["City"],drop_first=1)
city.head()

In [None]:
type= pd.get_dummies(df_test["Type"],drop_first=1)
type.head()

In [None]:
city_group= pd.get_dummies(df_test["city_group"],drop_first=1)
city_group.head()

In [None]:
df_test = pd.concat([df_test,city,type,city_group],axis= 1)
df_test

In [None]:
df_test.drop(["City","Type","city_group"],axis=1,inplace= True)
df_test

In [None]:
train_x =  df_train.drop(["revenue"],axis = 1).values
train_y = df_train["revenue"].values
x_test = df_test.copy()

train_x.shape , train_y.shape , x_test.shape

### Model Evaluation 

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.metrics import accuracy_score

### Linear Regression

In [None]:
lin_reg = LinearRegression()
lin_reg.fit(train_x,train_y)
y_pred_lin = lin_reg.predict(x_test)
acc_lin_reg = round( lin_reg.score(train_x, train_y) * 100, 2)
print(str(acc_lin_reg) + ' percent')

In [None]:
svc = LinearSVC()
svc.fit(train_x,train_y)
y_pred_svc = svc.predict(x_test)
acc_svc = round( svc.score(train_x, train_y) * 100, 2)
print(str(acc_svc) + ' percent')

In [None]:
regr = RandomForestRegressor(n_estimators=20)
regr.fit(train_x,train_y)
y_pred_random = regr.predict(x_test)
acc_random = round( regr.score(train_x, train_y) * 100, 2)
print(str(acc_random) + ' percent')

In [None]:
lasso_regr = Lasso(alpha=0.1)
lasso_regr.fit(train_x,train_y)
y_pred_lasso = lasso_regr.predict(x_test)
acc_lasso = round( lasso_regr.score(train_x, train_y) * 100, 2)
print(str(acc_lasso) + ' percent')

In [None]:
ridge_regr = Ridge(alpha=0.1)
ridge_regr.fit(train_x,train_y)
y_pred_ridge = ridge_regr.predict(x_test)
acc_ridge = round( ridge_regr.score(train_x, train_y) * 100, 2)
print(str(acc_ridge) + ' percent')

In [None]:
elastic_regr = ElasticNet(alpha=0.1)
elastic_regr.fit(train_x,train_y)
y_pred_elastic = elastic_regr.predict(x_test)
acc_elastic = round( elastic_regr.score(train_x, train_y) * 100, 2)
print(str(acc_elastic) + ' percent')

###  Comparing Model

In [None]:
models = pd.DataFrame({
    'Model': ['Linear Regression','Linear SVC','Random Forest', 'Lasso','Ridge','ElasticNet'],
    
    'Score': [acc_lin_reg,acc_svc ,acc_random ,acc_lasso,acc_ridge,acc_elastic]})

In [None]:
models.sort_values(by='Score', ascending=False)

In [None]:
submission = pd.DataFrame({"revenue": y_pred_random})

In [None]:
submission.to_csv("Submission1 csv")