### Pizza Lovers Welcome!!!
![Pizzzaa](https://images.unsplash.com/photo-1574126154517-d1e0d89ef734?ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&ixlib=rb-1.2.1&auto=format&fit=crop&w=667&q=80)

# Importing Packages & Dataset

In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
%matplotlib inline
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import sklearn.metrics as metrics
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df1 = pd.read_csv('/kaggle/input/pizza-price-prediction/pizza_v2.csv')

# Understanding & Preprocessing Data

In [None]:
df1.head()

In [None]:
df1.info()

In [None]:
#Checking for null values
df1.isna().sum()

In [None]:
#Modification of datpoints for better usability
df1['price'] = df1['price_rupiah'].apply(lambda x : x.replace("Rp","").replace(",",""))
df1['diameter'] = df1['diameter'].map(lambda x : x.replace(" inch",""))
df1.drop(['price_rupiah'],axis=1,inplace=True)
df1.head()

In [None]:
#Converting data types of Price & Diameter to int and float respectively 
df1["diameter"] = df1["diameter"].astype(float)
df1["price"] = df1["price"].astype(int)
df1.describe()

In [None]:
print(("Company: \n"),df1.company.value_counts(),("\n"))
print(("Toppings: \n"),df1.topping.value_counts(),("\n"))
print(("Variants: \n"),df1.variant.value_counts(),("\n"))
print(("Size: \n"),df1['size'].value_counts())

# Visualizing Columns

In [None]:
fig = px.pie(df1, names='company', title='Data Distribution based on Company')
fig.show()

In [None]:
df1.topping.value_counts().plot(kind='bar')

In [None]:
df1.variant.value_counts().plot(kind='bar')

In [None]:
px.pie(df1,names='size',title='Share of Size in Data available to us')

# Encoding Columns

In [None]:
en = LabelEncoder()

In [None]:
to_encode = ['company','topping','variant','size','extra_sauce','extra_cheese','extra_mushrooms']

In [None]:
for val in to_encode:
    df1[val] = en.fit_transform(df1[val])
df1.head()

In [None]:
sns.heatmap(df1.corr(),annot=True)
plt.show()
#we can see that there is high correlation btwn price & diameter

In [None]:
# creating train,test sets
X = df1.drop("price",axis = 1)
Y = df1["price"]
x_train, x_test, y_train, y_test = train_test_split(X,Y, test_size = 0.1, random_state =0) 

# Linear Regression

In [None]:
reg = LinearRegression()

In [None]:
#Training
reg.fit(x_train,y_train)
y_train_preds = reg.predict(x_train)
train_r2 = metrics.r2_score(y_train, y_train_preds)
print("Training R2 score:",train_r2)

In [None]:
#Testing
y_preds = reg.predict(x_test)
r2 =  metrics.r2_score(y_test, y_preds)
fig = px.scatter(x = y_test, y = y_preds,trendline='ols', title="Linear Regression Model")
print("Test R2 score :",r2)
fig.show()

# Random Forest Regressor

In [None]:
ran = RandomForestRegressor()

In [None]:
ran.fit(x_train,y_train)
y_train_preds = ran.predict(x_train)
train_r2 = metrics.r2_score(y_train, y_train_preds)
print("Training R2 score:",train_r2)

In [None]:
y_preds = ran.predict(x_test)
r2 =  metrics.r2_score(y_test, y_preds)
fig = px.scatter(x = y_test, y = y_preds,trendline='ols', title="Random Forest Regressor Model")
print("R2 score :",r2)
fig.show()

# Conclusion

We can see that our Random Forest Regressor Model performed better than the linear regression model. 

If you found this helpful, 🙌.