# **Python Chilla:** Python Advanced

**Name**: Arsalan Ali<br>
**Email**: arslanchaos@gmail.com

 ## Chapter 22: Machine Learning
 ### **Topics Included:**<br>
**1. Regression**
 * Simple Linear Regression
 * Multiple Linear Regression
 * Polynomial Linear Regression
 * Decision Tree Regressor
 * Random Forest Regressor
 * K-nearest neigbour Regressor
 * Support Vector Regressor

 
**2. Classification**
 * Logistic Regression
 * Decision Tree Classifier
 * Random Forest Classifier
 * K-nearest neighbour
 * Naive Bayes
 * Support Vector Machine

----

## Regression

In [17]:
# Basic libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Model libraries
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

# Data splitting (Train and Test) librarires
from sklearn.model_selection import train_test_split

# Evaluation libraries
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [18]:
# Import dataset
kashti = sns.load_dataset("titanic")
kashti = kashti.dropna()

In [19]:
X = kashti[["survived", "pclass", "age"]]
y = kashti["fare"]

In [16]:
# For Polynomail Regression
# poly = PolynomialFeatures(degree=2, include_bias=False)
# poly_features = poly.fit_transform(X)
# poly_reg_model = LinearRegression()
# poly_reg_model.fit(poly_features, y)
# y_predicted = poly_reg_model.predict(poly_features)

In [20]:
# shorten the names
lr = LinearRegression()
dt = DecisionTreeRegressor()
rf = RandomForestRegressor()
svr = SVR()
knn = KNeighborsRegressor()

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

for i in [lr, dt, rf, svr, knn]: # read all models
    i.fit(X_train, y_train)  # fitting a model
    y_pred = i.predict(X_test) # predict
    test_score = r2_score(y_test, y_pred)
    train_score = r2_score(y_train, i.predict(X_train))
    if abs(train_score - test_score) <= 5:
        print(i)
        print("Train score:", train_score)
        print("Test score:", test_score)
        print("R2 score:", test_score)
        print("MAE:", mean_absolute_error(y_test, y_pred))
        print("MSE:", mean_squared_error(y_test, y_pred))
        print("RMSE:", mean_squared_error(y_test, y_pred, squared=False))
        print("-"*30)

LinearRegression()
Train score: 0.13575911868401125
Test score: 0.18420184540921924
R2 score: 0.18420184540921924
MAE: 39.06230928977417
MSE: 2832.2796187549534
RMSE: 53.21916589683602
------------------------------
DecisionTreeRegressor()
Train score: 0.56361956026013
Test score: -2.0257976727472067
R2 score: -2.0257976727472067
MAE: 76.2173036036036
MSE: 10504.93315138339
RMSE: 102.49357614691463
------------------------------
RandomForestRegressor()
Train score: 0.5179305423954891
Test score: -1.0550099472431578
R2 score: -1.0550099472431578
MAE: 65.79437658603169
MSE: 7134.562338934302
RMSE: 84.46633849607962
------------------------------
SVR()
Train score: -0.07755386123708674
Test score: -0.1540243420248053
R2 score: -0.1540243420248053
MAE: 43.98135221147071
MSE: 4006.52980773596
RMSE: 63.297154815488824
------------------------------
KNeighborsRegressor()
Train score: 0.18120141264427858
Test score: -0.4511881220540135
R2 score: -0.4511881220540135
MAE: 54.81594216216216
MSE: 

## Classification

In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn import svm

from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [25]:
# Import dataset
tips = sns.load_dataset("tips")
tips = tips.dropna()
tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [29]:
X = tips.drop(labels="sex", axis=1)
y = tips["sex"]

In [30]:
from sklearn.preprocessing import LabelEncoder
X["smoker"] = LabelEncoder().fit_transform(X["smoker"])
X["day"] = LabelEncoder().fit_transform(X["day"])
X["time"] = LabelEncoder().fit_transform(X["time"])

In [34]:
lr_c = LogisticRegression()
dt_c = DecisionTreeClassifier()
rf_c = RandomForestClassifier(n_estimators=80) 
knn_c = KNeighborsClassifier(n_neighbors=5)
nb_c = GaussianNB()
svm_c = svm.SVC(kernel="linear")

In [42]:
# model loop
import warnings
warnings.filterwarnings('ignore') 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

for i in [lr_c, dt_c, rf_c, knn_c, nb_c, svm_c]: # read all models
    i.fit(X_train, y_train)  # fitting a model
    y_pred = i.predict(X_test) # predict
    cm = confusion_matrix(y_test, i.predict(X_test))
    print(i)
    print(cm)
    print(f"""
Score: {round(accuracy_score(y_test,y_pred),4)}
Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}
Classification Report: \n{classification_report(y_test, y_pred)}
    """)
    # print(i.classification_report)
    print("-"*60)

LogisticRegression()
[[ 2 17]
 [ 2 28]]

Score: 0.6122
Confusion Matrix: 
[[ 2 17]
 [ 2 28]]
Classification Report: 
              precision    recall  f1-score   support

      Female       0.50      0.11      0.17        19
        Male       0.62      0.93      0.75        30

    accuracy                           0.61        49
   macro avg       0.56      0.52      0.46        49
weighted avg       0.57      0.61      0.52        49

    
------------------------------------------------------------
DecisionTreeClassifier()
[[ 9 10]
 [ 8 22]]

Score: 0.6327
Confusion Matrix: 
[[ 9 10]
 [ 8 22]]
Classification Report: 
              precision    recall  f1-score   support

      Female       0.53      0.47      0.50        19
        Male       0.69      0.73      0.71        30

    accuracy                           0.63        49
   macro avg       0.61      0.60      0.60        49
weighted avg       0.63      0.63      0.63        49

    
-------------------------------------