# Linear Regression - Example 1 - Tips

## Step 1 - Load Data


In [None]:
import pandas as pd

tip_data = pd.DataFrame({'bill' : [50.00, 30.00, 60.00, 40.00, 65.00, 20.00, 10.00, 15.00, 25.00, 35.00],
                        'tip' : [12.00, 7.00, 13.00, 8.00, 15.00, 5.00, 2.00, 2.00, 3.00, 4.00]})
tip_data

## Step 2 - Exploratory Data Analytics (EDA)
Do some basic plots ..etc

In [None]:
## describe
tip_data.describe()

In [None]:
## plot 
%matplotlib inline
import matplotlib.pyplot as plt

plt.scatter(tip_data['bill'], tip_data['tip'])

## Step 3 - Shape Data

In [None]:
x = tip_data[['bill']]
print(x)

y = tip_data[['tip']]
print (y)

## Step 4 : Do Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression


model = LinearRegression ().fit(x, y)
model

## Step 5 : Predict

In [None]:
predictions = model.predict (x)
predictions

## Step 6 - Compare actual vs. predictions

In [None]:
tip_data['predictions'] = predictions
tip_data

## Step 7 : Evaluate

We can see the model coefficients from below
- slope : 0.24
- intercept : -1.4

And R2 is 90%.  That is pretty good!

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
from math import sqrt

print('Coefficients:', model.coef_)
print('intercept:', model.intercept_)

slope = model.coef_[0][0]
intercept = model.intercept_[0]
print ("slope : ", slope, ", intercept: ", intercept)

print ("R2 : " , r2_score(y, predictions))

print ("MSE : ", mean_squared_error(y, predictions))
print ("RMSE : ", sqrt(mean_squared_error(y, predictions)) )

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

plt.scatter(x,y , color="black")
plt.plot(x, predictions, color='blue')

## Step 8 : Predict on some new data

In [None]:
new_data = pd.DataFrame({'bill' : [100,200, 300]})
new_data

In [None]:
new_tips = model.predict (new_data)
new_tips

In [None]:
new_data['predicted_tip'] = new_tips
new_data