## Waiter Tips (Case Study)

    total_bill: Total bill in dollars including taxes
    tip: Tip given to waiters in dollars
    sex: gender of the person paying the bill
    smoker: whether the person smoked or not
    day: day of the week
    time: lunch or dinner
    size: number of people in a table 

In [2]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

data = pd.read_csv('tips.csv')
data.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [5]:
figure = px.scatter(data_frame=data, x='total_bill', y='size', trendline='ols')
figure.show()

In [6]:
figure = px.scatter(data_frame = data, x="total_bill",
                    y="tip", size="size", color= "sex", trendline="ols")
figure.show()

In [7]:
figure = px.scatter(data_frame = data, x="total_bill",
                    y="tip", size="size", color= "time", trendline="ols")
figure.show()

In [8]:
figure = px.pie(data, 
             values='tip', 
             names='day',hole = 0.5)
figure.show()
# the most paid tips are on saturday

In [9]:
figure = px.pie(data, 
             values='tip', 
             names='sex',hole = 0.5)
figure.show()
# the most paid tips are by men

In [10]:
figure = px.pie(data, 
             values='tip', 
             names='smoker',hole = 0.5)
figure.show()
#it seems that smokers save money for cigarettes

In [12]:
figure = px.pie(data,
               values='tip',
               names='time',
               hole=0.5)
figure.show()
#people pay more on dinner than lunch

In [13]:
# transforming the categorical values into numerical values:
data['sex'] = data['sex'].map({'Female':0, 'Male':1})
data["smoker"] = data["smoker"].map({"No": 0, "Yes": 1})
data["day"] = data["day"].map({"Thur": 0, "Fri": 1, "Sat": 2, "Sun": 3})
data["time"] = data["time"].map({"Lunch": 0, "Dinner": 1})
data.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,0,0,3,1,2
1,10.34,1.66,1,0,3,1,3
2,21.01,3.5,1,0,3,1,3
3,23.68,3.31,1,0,3,1,2
4,24.59,3.61,0,0,3,1,4


In [14]:
# split the data into training and test sets:
x = np.array(data[["total_bill", "sex", "smoker", "day", 
                   "time", "size"]])
y = np.array(data["tip"])

from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x, y, 
                                                test_size=0.2, 
                                                random_state=42)

In [15]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(xtrain, ytrain)

LinearRegression()

In [16]:
# features = [[total_bill, "sex", "smoker", "day", "time", "size"]]
features = np.array([[24.50, 1, 0, 0, 1, 4]])
model.predict(features)

array([3.73742609])

In [17]:
features = np.array([[100.0, 1, 0, 2, 1, 4]])
model.predict(features)

array([10.9952885])