## Intermediate Machine Learning Basics with skLearn

### Simple Linear Regression with scikit-learn

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
# load dataset
df = sns.load_dataset('tips')
# select feature and target
X = df[['total_bill']]
y = df['tip']
# create and train model
model = LinearRegression()
model.fit(X, y) 
# predict tip for a new total_bill value
predicted_tip = model.predict([[50]])
print(f'Predicted tip for a total bill of $50: ${predicted_tip[0]}')   

Predicted tip for a total bill of $50: $6.171495482772341




### Multi-Variable Linear Regression with scikit-learn

In [2]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
# load dataset
df = sns.load_dataset('tips')
# select feature and target
X = df[['total_bill' , 'size']]
y = df['tip']
# create and train model
model = LinearRegression()
model.fit(X, y) 
# predict tip for a new total_bill value
predicted_tip = model.predict([[50,2]])
print(f'Predicted tip for a total bill of $50: ${predicted_tip[0]}')   

Predicted tip for a total bill of $50: $5.689807171207553




### Multi-Variable Linear Regression with feature encoding  with scikit-learn

In [10]:
# write the whole script above in one go
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
# load dataset
df = sns.load_dataset('tips')
df.head()
 # encode categorical variable smoker to numerical
df['smoker_encoded'] = df['smoker'].map({'Yes': 1, 'No': 0})
df.head()
# select features and target 
X = df[['total_bill' , 'size', 'smoker_encoded']]
y = df['tip']
# create and train model
model = LinearRegression()
model.fit(X, y) 
# predict tip for a new total_bill value
predicted_tip_smoker = model.predict([[50,2,1 ]])
predicted_tip_non_smoker = model.predict([[50,2,0 ]])
print(f'Predicted tip for a total bill of $50 with a smoker: ${predicted_tip_smoker[0]}') 
print(f'Predicted tip for a total bill of $50 without a smoker: ${predicted_tip_non_smoker[0]}') 

Predicted tip for a total bill of $50 with a smoker: $5.680665573697642
Predicted tip for a total bill of $50 without a smoker: $5.764098126252621




## Classification with scikit-learn

### Logistic Regression with scikit-learn

### Binary classification


In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression , LogisticRegression
# load dataset
df = sns.load_dataset('tips')
# select feature and target
X = df[['total_bill' , 'size' , 'tip']]
y = df['smoker']
# create and train model
model = LogisticRegression()  
model.fit(X, y) 
# predict tip for a new total_bill value
predicted_tip = model.predict([[50,2,5.23]])
print(f'here is the result : {predicted_tip[0]} , yes means smoker , No means non-smoker') 

here is the result : Yes , yes means smoker , No means non-smoker




### Naive bayes classification with scikit-learn

In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# import naive bayes
from sklearn.naive_bayes import GaussianNB
# load dataset
df = sns.load_dataset('tips')
# select feature and target
X = df[['total_bill' , 'size' , 'tip']]
y = df['smoker']
# create and train model
model = GaussianNB()  
model.fit(X, y) 
# predict tip for a new total_bill value
predicted_tip = model.predict([[50,2,5.23]])
print(f'here is the result : {predicted_tip[0]} , yes means smoker , No means non-smoker') 

here is the result : Yes , yes means smoker , No means non-smoker




### Decision Tree Classification with scikit-learn

In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# import naive bayes
from sklearn.naive_bayes import GaussianNB
# Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier
# load dataset
df = sns.load_dataset('tips')
# select feature and target
X = df[['total_bill' , 'size' , 'tip']]
y = df['smoker']
# create and train model
model = DecisionTreeClassifier()  
model.fit(X, y) 
# predict tip for a new total_bill value
predicted_tip = model.predict([[50,2,5.23]])
print(f'here is the result : {predicted_tip[0]} , yes means smoker , No means non-smoker') 

here is the result : Yes , yes means smoker , No means non-smoker




### Decision Tree Regression with scikit-learn

In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Decision Tree Regressor
from sklearn.tree import DecisionTreeRegressor
# load dataset
df = sns.load_dataset('tips')
# select feature and target
X = df[['total_bill' , 'size']]
y = df['tip']
# create and train model
model = DecisionTreeRegressor()  
model.fit(X, y) 
# predict tip for a new total_bill value
predicted_tip = model.predict([[50,2]])
print(f'here is the result : {predicted_tip[0]} , yes means smoker , No means non-smoker') 

here is the result : 10.0 , yes means smoker , No means non-smoker




## Regression Metrics with scikit-learn

In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
# load dataset
df = sns.load_dataset('tips')
df.head()
X = df[['total_bill' , 'size']]
y = df['tip']   

# train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)   
# call model
model = LinearRegression()  
# train model
model.fit(X_train, y_train)
# metric to evaluate the model
from sklearn.metrics import mean_squared_error, r2_score , mean_absolute_error , mean_absolute_percentage_error 
# make predictions
y_pred = model.predict(X_test)
# evaluate the model
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}")
print(f"R^2 Score: {r2_score(y_test, y_pred)}")
# root mean squared error
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Root Mean Squared Error: {rmse}")
print(f"Mean Absolute Percentage Error: {mean_absolute_percentage_error(y_test, y_pred)}")

Mean Squared Error: 0.6485996190543517
Mean Absolute Error: 0.6639235737596481
R^2 Score: 0.4811084097989491
Root Mean Squared Error: 0.8053568271607013
Mean Absolute Percentage Error: 0.27978917485910254
