##### Import Necessary libraries

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go 

#load dataset
data = pd.read_csv(r"https://raw.githubusercontent.com/amankharwal/Website-data/master/tips.csv")
print(data.head())

   total_bill   tip     sex smoker  day    time  size
0       16.99  1.01  Female     No  Sun  Dinner     2
1       10.34  1.66    Male     No  Sun  Dinner     3
2       21.01  3.50    Male     No  Sun  Dinner     3
3       23.68  3.31    Male     No  Sun  Dinner     2
4       24.59  3.61  Female     No  Sun  Dinner     4


In [2]:
# let's check information about dataset
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   total_bill  244 non-null    float64
 1   tip         244 non-null    float64
 2   sex         244 non-null    object 
 3   smoker      244 non-null    object 
 4   day         244 non-null    object 
 5   time        244 non-null    object 
 6   size        244 non-null    int64  
dtypes: float64(2), int64(1), object(4)
memory usage: 13.5+ KB


In [28]:
#checking null values
print(data.isnull().sum())

total_bill    0
tip           0
sex           0
smoker        0
day           0
time          0
size          0
dtype: int64


##### Waiter Tips Analysis

In [4]:
# Let's have a look at the tips given to people according to the 
# total bill paid 
# number of peoples at the table
# and day of the week 

fig = px.scatter(data_frame=data, x="total_bill",
                 y="tip", size="size", color="day", trendline="ols")
fig.show()


In [5]:
# total bill paid 
# number of peoples at the table
# and gender of the person

fig = px.scatter(data_frame=data, x="total_bill",
                 y="tip", size="size", color="sex", trendline="ols")
fig.show()

In [6]:
# total bill paid 
# number of peoples at the table
# and time of the meal

fig = px.scatter(data_frame=data, x="total_bill", 
                 y="tip", size="size", color="time", trendline="ols")
fig.show()


In [7]:
#Now let’s see the tips given to the waiters according to the days to find out which day the most tips are given to the waiters:
fig = px.pie(data_frame=data, 
             values="tip",
             names="day", hole=0.5)
fig.show()

In [8]:
#let’s look at the number of tips given to waiters by gender of the person paying the bill to see who tips waiters the most:
fig = px.pie(data_frame=data,
             values="tip",
             names="sex", hole=0.5)
fig.show()

In [9]:
#Now let’s see if a smoker tips more or a non-smoker:
fig = px.pie(data_frame=data, 
             values="tip",
             names="smoker", hole=0.5)
fig.show()

In [10]:
#let's see which time peoples give tips more
fig = px.pie(data_frame=data,
             values="tip",
             names="time", hole=0.5)
fig.show()

##### Waiter Tips Prediction Model

In [11]:
# Before training a machine learning prediction model. i will do some transformatoin 
# converting the categorical variables into numericals
data["sex"] = data["sex"].map({"Female":0, "Male":1})
data["smoker"] = data["smoker"].map({"No":0, "Yes":1})
data["day"] = data["day"].map({"Thu":0, "Fri":1, "Sat":2, "Sun":3})
data["time"] = data["time"].map({"Lunch":0, "Dinner":1})

print(data.head())

   total_bill   tip  sex  smoker  day  time  size
0       16.99  1.01    0       0  3.0     1     2
1       10.34  1.66    1       0  3.0     1     3
2       21.01  3.50    1       0  3.0     1     3
3       23.68  3.31    1       0  3.0     1     2
4       24.59  3.61    0       0  3.0     1     4


In [63]:
# Now i"ll split the data into training and test sets
data = data.dropna()
x = np.array(data[["total_bill", "sex", "smoker", "day", 
                   "time", "size"]])
y = np.array(data["tip"])

# import sklearn library
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x, y, 
                                                test_size=0.2, 
                                                random_state=42)

# Let's train the machine learning model for the for task of waiter tips predictoin task
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(xtrain, ytrain)
model.score(xtest, ytest)

0.29340084494950835

In [64]:
#Now let’s test the performance of this model by giving inputs to this model according to the features that we have used to train this model:
# features = np.array([[data[["total_bill", "sex", "smoker", "day",  "time", "size"]])
features = np.array([[24.50, 1, 0, 0, 1, 4]])
print(model.predict(features))

[3.78750357]
