<a href="https://colab.research.google.com/github/KamilBienias/neural-network-course/blob/main/02_basics/05_regression_metrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Metryki - Problem regresji:
1. [Import bibliotek](#a0)
2. [Metryki](#a1)
    1. [Interpretacja graficzna](#a2)
    2. [Mean Absolute Error - MAE](#a3)
    3. [Mean Squared Error - MSE](#a4)
    4. [Root Mean Squared Error - RMSE](#a5)
    5. [Max Error](#a6)
    6. [R2 score - współczynnik determinacji](#a7)

    

### <a name='a0'></a> 1. Import bibliotek

In [None]:
%tensorflow_version 2.x

In [None]:
# ######################################################
# Etap 5. Odcinek: Metryki w problemach regresji.

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [None]:
# niech to będzie 50 prawdziwych cen spodni o rozkładzie N(100,20)
y_true = 100 + 20 * np.random.randn(50)
y_true

array([120.88859661,  86.53250299, 100.1813557 ,  92.3313352 ,
       114.86212982,  79.78894189,  99.0781279 ,  92.85118449,
        77.29337602,  93.24090274, 128.63969965,  75.27757454,
       134.70366632, 102.70760129,  92.87933712, 123.20461513,
       101.8999153 , 104.57123339,  82.5360423 , 100.31135909,
        61.64374435, 121.42319744, 107.5589889 , 100.12536263,
       119.62081959,  81.03655466, 113.16103651,  94.25552171,
        85.94369552,  65.90270802,  89.960806  , 117.69979889,
       116.39962075,  62.1991481 , 100.42930497, 100.41291535,
        88.00270954, 121.84429314,  97.06785991,  94.3592056 ,
       111.30699052,  85.47274387,  92.73876182,  74.81764189,
        78.54132353,  80.33687383, 127.1124558 , 127.45248038,
       116.39067624,  78.07135648])

In [None]:
# 50 przewidzianych cen spodni. Dodaje szum z rozkładu N(0,10)
y_pred = y_true + 10 * np.random.randn(50)
y_pred

array([125.13761758,  71.23224189, 120.67389224,  85.09305591,
       127.33136989,  78.53344439,  93.13408313, 101.38005095,
        72.68531499,  92.4050439 , 145.15842494,  75.65857731,
       118.50449333, 115.36131007,  90.20068992, 123.57270848,
        99.96172024, 100.83594301,  80.94023239, 103.47003239,
        73.43601449, 130.49602142, 104.18313085, 100.54285288,
       113.58412447,  93.87176663, 109.91607484,  86.35115055,
        93.87235383,  55.1071868 ,  76.92634423, 119.99263301,
       116.01601058,  55.7476524 ,  93.30195801,  89.13942641,
        91.84151216, 122.81409752,  93.87580452, 109.26932709,
       117.16557216, 103.93318702,  92.6589693 ,  82.27295969,
        69.43961454,  57.24676093, 131.62633426, 118.38238671,
       138.28378005,  74.10653862])

In [None]:
results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results.head()

Unnamed: 0,y_true,y_pred
0,120.888597,125.137618
1,86.532503,71.232242
2,100.181356,120.673892
3,92.331335,85.093056
4,114.86213,127.33137


In [None]:
# Jeśli nałożę moduły na kolumnę error, zsumuję i podzielę przez ich ilość
# to dostanę mean absolute error.
results['error'] = results['y_true'] - results['y_pred']
results.head()

Unnamed: 0,y_true,y_pred,error
0,120.888597,125.137618,-4.249021
1,86.532503,71.232242,15.300261
2,100.181356,120.673892,-20.492537
3,92.331335,85.093056,7.238279
4,114.86213,127.33137,-12.46924


### <a name='a1'></a> 2. Metryki
### <a name='a2'></a> 2.1 Interpretacja graficzna

In [None]:
def plot_regression_results(y_true, y_pred): 
    results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
    min = results[['y_true', 'y_pred']].min().min()
    max = results[['y_true', 'y_pred']].max().max()

    fig = go.Figure(data=[go.Scatter(x=results['y_true'], y=results['y_pred'], mode='markers'),
                    go.Scatter(x=[min, max], y=[min, max])],
                    layout=go.Layout(showlegend=False, width=800,
                                     xaxis_title='y_true', 
                                     yaxis_title='y_pred',
                                     title='Regression results'))
    fig.show()
plot_regression_results(y_true, y_pred)

In [None]:
y_true = 100 + 20 * np.random.randn(1000)
y_pred = y_true + 10 * np.random.randn(1000)
results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results['error'] = results['y_true'] - results['y_pred']

px.histogram(results, x='error', nbins=50, width=800)

### <a name='a3'></a> 2.2 Mean Absolute Error
### $$MAE = \frac{1}{n}\sum_{i=1}^{N}|y_{true} - y_{pred}|$$

In [None]:
def mean_absolute_error(y_true, y_pred):
    return abs(y_true - y_pred).sum() / len(y_true)

mean_absolute_error(y_true, y_pred)

8.012489391267776

In [None]:
from tensorflow.keras.losses import mean_absolute_error
mean_absolute_error(y_true, y_pred)

<tf.Tensor: shape=(), dtype=float64, numpy=8.012489391267776>

In [None]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_true, y_pred)

8.012489391267776

### <a name='a4'></a> 2.3 Mean Squared Error
### $$MSE = \frac{1}{n}\sum_{i=1}^{N}(y_{true} - y_{pred})^{2}$$

In [None]:
# ta metryka badziej karze większe błędy bo z nich większy kwadrat
def mean_squared_error(y_true, y_pred):
    return ((y_true - y_pred) ** 2).sum() / len(y_true)

mean_squared_error(y_true, y_pred)

99.11845143250373

In [None]:
from tensorflow.keras.losses import mean_squared_error
mean_squared_error(y_true, y_pred)

<tf.Tensor: shape=(), dtype=float64, numpy=99.11845143250373>

In [None]:
from sklearn.metrics import mean_squared_error

mean_squared_error(y_true, y_pred)

99.11845143250373

### <a name='a5'></a> 2.4 Root Mean Squared Error
### $$RMSE = \sqrt{MSE}$$

In [None]:
def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(((y_true - y_pred) ** 2).sum() / len(y_true))

root_mean_squared_error(y_true, y_pred)

9.955825000094354

In [None]:
# o tyle średno się mylimy
np.sqrt(mean_squared_error(y_true, y_pred))

9.955825000094354

### <a name='a6'></a> 2.5 Max Error

In [None]:
def max_error(y_true, y_pred):
    return abs(y_true - y_pred).max()

In [None]:
max_error(y_true, y_pred)

31.691624381505207

In [None]:
from sklearn.metrics import max_error

max_error(y_true, y_pred)

31.691624381505207

### <a name='a7'></a> 2.6 R2 score - współczynnik determinacji
### $$R2\_score = 1 - \frac{\sum_{i=1}^{N}(y_{true} - y_{pred})^{2}}{\sum_{i=1}^{N}(y_{true} - \overline{y_{true}})^{2}}$$

Sprawdza o ile lepiej działa model od modelu który zwraca średnią.

In [None]:
from sklearn.metrics import r2_score

r2_score(y_true, y_pred)

0.7511799947594112

In [None]:
def r2_score(y_true, y_pred):
    numerator = ((y_true - y_pred) ** 2).sum()
    denominator = ((y_true - y_true.mean()) ** 2).sum()
    try:
        r2 = 1 - numerator / denominator
    except ZeroDivisionError:
        print('Dzielenie przez zero')
    return r2

In [None]:
r2_score(y_true, y_pred)

0.7511799947594112