## 1. MSE Scikit-learn

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score

In [None]:
y_true = [91, 51, 2.5, 2, -5]
y_pred = [90, 48, 2, 2, -4]

In [None]:
mean_squared_error(y_true, y_pred)

## 2. Accuracy Scikit-learn

In [None]:
y_pred = [0, 1, 0, 1, 0, 1, 0]
y_true = [0, 0, 1, 1, 1, 1, 0]

In [None]:
accuracy_score(y_true, y_pred)

## 3. Regression

In [None]:
# imports
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [None]:
# data
housing = fetch_california_housing()
X, y = housing['data'], housing['target']

In [None]:
# split data train test
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.1,
    shuffle=True,
    random_state=13)

In [None]:
# pipeline
pipeline = [
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
    ('lr', LinearRegression())
]
pipe = Pipeline(pipeline)

In [None]:
# fit
pipe.fit(X_train, y_train)

In [None]:
y_train_preds = pipe.predict(X_train)
y_train_preds[:10]

In [None]:
y_test_preds = pipe.predict(X_test)
y_test_preds[:10]

In [None]:
r2_score(y_train, y_train_preds), r2_score(y_test, y_test_preds)

In [None]:
mean_squared_error(y_train, y_train_preds), mean_squared_error(y_test, y_test_preds)

In [None]:
mean_absolute_error(y_train, y_train_preds), mean_squared_error(y_test, y_test_preds)

## 4. Classification

In [None]:
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import numpy as np

from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, roc_auc_score, RocCurveDisplay
from sklearn.metrics import confusion_matrix

In [None]:
X, y = load_breast_cancer(return_X_y=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=43)

In [None]:
scaler = StandardScaler()
#X_train_scaled = scaler.fit_transform(X_train)

In [None]:
classifier = LogisticRegression()
#classifier.fit(X_train_scaled, y_train)

In [None]:
pipe = Pipeline([
    ("scaler", scaler),
    ("classifier", classifier)
])
pipe.fit(X_train, y_train)
y_train_preds = pipe.predict(X_train)
y_test_preds = pipe.predict(X_test)

y_train_probs = pipe.predict_proba(X_train)[:, 1]
y_test_probs =  pipe.predict_proba(X_test)[:, 1]
y_train_preds[:10], y_test_preds[:10]

In [None]:
def score(y_true, y_pred, y_probs):
    print("f1:", f1_score(y_true, y_pred))
    print("accuracy:", accuracy_score(y_true, y_pred))
    print("precison:", precision_score(y_true, y_pred))
    print("recall:", recall_score(y_true, y_pred))
    print("roc_auc:", roc_auc_score(y_true, y_probs))

In [None]:
score(y_train, y_train_preds, y_train_probs)

In [None]:
score(y_test, y_test_preds, y_test_probs)

In [None]:
confusion_matrix(y_test, y_test_preds)

In [None]:
RocCurveDisplay.from_predictions(y_test, y_test_probs)
plt.show()

## 5. Machine Learning Models

In [5]:
# imports
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline

from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
# data
housing = fetch_california_housing()
X, y = housing['data'], housing['target']
# split data train test
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.1,
                                                    shuffle=True,
                                                    random_state=43)
# pipeline
pipeline = [('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler()),
            ('lr', LinearRegression())]
pipe = Pipeline(pipeline)
# fit
pipe.fit(X_train, y_train)


0,1,2
,steps,"[('imputer', ...), ('scaler', ...), ...]"
,transform_input,
,memory,
,verbose,False

0,1,2
,missing_values,
,strategy,'median'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [6]:
def score(y_true, y_pred):
    print("r2: ", r2_score(y_true, y_pred))
    print("mae: ", mean_absolute_error(y_true, y_pred))
    print("mse: ", mean_squared_error(y_true, y_pred))

In [7]:
def def_pipe(model, title):
    pipeline = [('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler()),
    ('model', model)]
    pipe = Pipeline(pipeline)
    pipe.fit(X_train, y_train)

    y_preds_test = pipe.predict(X_test)
    y_preds_train = pipe.predict(X_train)

    print(f"{title}:\n")
    print("Train: \n")
    score(y_train, y_preds_train)
    print("\nTest: \n")
    score(y_test, y_preds_test)

In [8]:
def_pipe(LinearRegression(), "Linear Regression")

Linear Regression:

Train: 

r2:  0.6054131599242077
mae:  0.5330920012614552
mse:  0.5273648371379568

Test: 

r2:  0.6128959462132963
mae:  0.5196420310323715
mse:  0.49761195027083804


In [13]:
def_pipe(SVR(), "SVM")

SVM:

Train: 

r2:  0.7496108582936643
mae:  0.3835645163325985
mse:  0.3346447867133914

Test: 

r2:  0.7295080649899692
mae:  0.3897680598426783
mse:  0.3477101776542994


In [10]:
def_pipe(DecisionTreeRegressor(random_state=43), "Decision Tree")

Decision Tree:

Train: 

r2:  1.0
mae:  4.221907539810565e-17
mse:  9.24499456646287e-32

Test: 

r2:  0.6228217144931267
mae:  0.4403051356589147
mse:  0.4848526395290697


In [11]:
def_pipe(RandomForestRegressor(random_state=43), "Random Forest")

Random Forest:

Train: 

r2:  0.9741263135396302
mae:  0.12000198560508221
mse:  0.03458015083247723

Test: 

r2:  0.8119778189909694
mae:  0.3194169859011629
mse:  0.24169750554364758


In [12]:
def_pipe(GradientBoostingRegressor(random_state=43), "Gradient Boosting")

Gradient Boosting:

Train: 

r2:  0.8042086499063384
mae:  0.3565654303668227
mse:  0.26167490389525294

Test: 

r2:  0.7895081234643192
mae:  0.36455447680396397
mse:  0.270581700642181
