<a href="https://colab.research.google.com/github/MikolajWasowski/neural-network-course/blob/master/02_basics/05_regression_metrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

* @author: krakowiakpawel9@gmail.com  
* @site: e-smartdata.org

### Metryki - Problem regresji:
1. [Import bibliotek](#a0)
2. [Metryki](#a1)
    1. [Interpretacja graficzna](#a2)
    2. [Mean Absolute Error - MAE](#a3)
    3. [Mean Squared Error - MSE](#a4)
    4. [Root Mean Squared Error - RMSE](#a5)
    5. [Max Error](#a6)
    6. [R2 score - współczynnik determinacji](#a7)

    

### <a name='a0'></a> 1. Import bibliotek

In [2]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import tensorflow as tf
tf.__version__

'2.15.0'

In [3]:
y_true = 100 + 20 * np.random.randn(50)
y_true

array([123.92389659,  89.39931381, 121.14423549,  84.63020683,
        99.14156389,  68.05849165, 115.39621296,  97.6188764 ,
        97.58625716, 126.57903538,  83.40546409,  81.65539043,
       124.50065955,  68.96696764,  82.91261483, 101.24511589,
        96.26153077,  98.3008419 , 102.18640597,  70.54994806,
        93.15377585,  93.30287284,  87.48530346, 122.6960646 ,
        73.19810302, 135.10613117, 102.91543809,  86.32416435,
       108.52949543, 108.99468291, 117.95962506, 107.23824803,
       103.6891406 , 110.12715677, 108.62386682,  70.15670672,
       117.71796318, 108.79136727, 118.75704215, 106.10814578,
       104.10972574,  75.02620014,  85.90288784, 117.6277488 ,
       118.41868757, 123.12723979,  82.33879737, 101.40450409,
       106.45015329, 110.90819584])

In [4]:
y_pred = y_true + 10 * np.random.randn(50)
y_pred

array([128.72995168,  93.98312015, 123.39373668,  85.47212599,
        69.87352396,  77.10117156, 122.42247999,  95.08091471,
       104.35305488, 124.01104997,  82.29090629,  79.79584917,
       121.73358808,  75.13929055,  83.87615905, 101.12977869,
       109.63393567,  83.49700727, 123.45194133,  76.92769988,
        90.54638778,  92.57913662,  98.71592956, 122.55574541,
        83.9466003 , 141.50594936, 123.7523451 ,  95.86821098,
       118.34306392, 130.38232345, 117.45061294, 116.66362073,
       111.83710436,  83.17547352, 127.32374207,  67.20517118,
       117.76419571,  97.80179886, 115.92432735, 110.07183935,
       104.23035725,  82.68262085,  90.68542208, 107.21550897,
       121.73723564, 131.69827843,  90.22063191, 103.04237368,
       110.80233663, 113.62240687])

In [5]:
results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results.head()

Unnamed: 0,y_true,y_pred
0,123.923897,128.729952
1,89.399314,93.98312
2,121.144235,123.393737
3,84.630207,85.472126
4,99.141564,69.873524


In [6]:
results['error'] = results['y_true'] - results['y_pred']
results.head()

Unnamed: 0,y_true,y_pred,error
0,123.923897,128.729952,-4.806055
1,89.399314,93.98312,-4.583806
2,121.144235,123.393737,-2.249501
3,84.630207,85.472126,-0.841919
4,99.141564,69.873524,29.26804


### <a name='a1'></a> 2. Metryki
### <a name='a2'></a> 2.1 Interpretacja graficzna

In [7]:
def plot_regression_results(y_true, y_pred):
    results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
    min = results[['y_true', 'y_pred']].min().min()
    max = results[['y_true', 'y_pred']].max().max()

    fig = go.Figure(data=[go.Scatter(x=results['y_true'], y=results['y_pred'], mode='markers'),
                    go.Scatter(x=[min, max], y=[min, max])],
                    layout=go.Layout(showlegend=False, width=800,
                                     xaxis_title='y_true',
                                     yaxis_title='y_pred',
                                     title='Regression results'))
    fig.show()
plot_regression_results(y_true, y_pred)

In [8]:
y_true = 100 + 20 * np.random.randn(1000)
y_pred = y_true + 10 * np.random.randn(1000)
results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results['error'] = results['y_true'] - results['y_pred']

px.histogram(results, x='error', nbins=50, width=800)

### <a name='a3'></a> 2.2 Mean Absolute Error
### $$MAE = \frac{1}{n}\sum_{i=1}^{N}|y_{true} - y_{pred}|$$

In [9]:
def mean_absolute_error(y_true, y_pred):
    return abs(y_true - y_pred).sum() / len(y_true)

mean_absolute_error(y_true, y_pred)

7.620712979055358

In [10]:
from tensorflow.keras.losses import mean_absolute_error
mean_absolute_error(y_true, y_pred)

<tf.Tensor: shape=(), dtype=float64, numpy=7.620712979055358>

In [11]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_true, y_pred)

7.620712979055358

### <a name='a4'></a> 2.3 Mean Squared Error
### $$MSE = \frac{1}{n}\sum_{i=1}^{N}(y_{true} - y_{pred})^{2}$$

In [12]:
def mean_squared_error(y_true, y_pred):
    return ((y_true - y_pred) ** 2).sum() / len(y_true)

mean_squared_error(y_true, y_pred)

92.62540894631736

In [13]:
from tensorflow.keras.losses import mean_squared_error
mean_squared_error(y_true, y_pred)

<tf.Tensor: shape=(), dtype=float64, numpy=92.62540894631736>

In [14]:
from sklearn.metrics import mean_squared_error

mean_squared_error(y_true, y_pred)

92.62540894631736

### <a name='a5'></a> 2.4 Root Mean Squared Error
### $$RMSE = \sqrt{MSE}$$

In [15]:
def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(((y_true - y_pred) ** 2).sum() / len(y_true))

root_mean_squared_error(y_true, y_pred)

9.624209523192924

In [16]:
np.sqrt(mean_squared_error(y_true, y_pred))

9.624209523192924

### <a name='a6'></a> 2.5 Max Error

In [17]:
def max_error(y_true, y_pred):
    return abs(y_true - y_pred).max()

In [18]:
max_error(y_true, y_pred)

36.20595758835829

In [19]:
from sklearn.metrics import max_error

max_error(y_true, y_pred)

36.20595758835829

### <a name='a7'></a> 2.6 R2 score - współczynnik determinacji
### $$R2\_score = 1 - \frac{\sum_{i=1}^{N}(y_{true} - y_{pred})^{2}}{\sum_{i=1}^{N}(y_{true} - \overline{y_{true}})^{2}}$$

In [20]:
from sklearn.metrics import r2_score

r2_score(y_true, y_pred)

0.7659759481742594

In [21]:
def r2_score(y_true, y_pred):
    numerator = ((y_true - y_pred) ** 2).sum()
    denominator = ((y_true - y_true.mean()) ** 2).sum()
    try:
        r2 = 1 - numerator / denominator
    except ZeroDivisionError:
        print('Dzielenie przez zero')
    return r2

In [22]:
r2_score(y_true, y_pred)

0.7659759481742594