<a href="https://colab.research.google.com/github/MattWroclaw/neural-networks/blob/main/02_basics/05_regression_metrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Metryki - Problem regresji:
1. [Import bibliotek](#a0)
2. [Metryki](#a1)
    1. [Interpretacja graficzna](#a2)
    2. [Mean Absolute Error - MAE](#a3)
    3. [Mean Squared Error - MSE](#a4)
    4. [Root Mean Squared Error - RMSE](#a5)
    5. [Max Error](#a6)
    6. [R2 score - współczynnik determinacji](#a7)

    

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import tensorflow as tf
tf.__version__



'2.17.0'

In [2]:
y_true = 100 + 20 * np.random.randn(50)
y_true

array([ 75.61843209,  76.55609789,  73.69892473,  92.84927315,
        99.08945519,  97.5539587 , 115.15012833,  89.95042247,
        61.51887152,  93.03040711,  98.96966059, 118.40601441,
       118.44329025,  64.26982682, 147.16919394, 109.71540184,
       107.04569904,  87.6752742 , 105.29255287, 111.96006321,
        67.23318499, 141.86407215, 114.81097098,  89.74344089,
       101.99720171, 125.63580728, 114.42799502,  81.10642202,
       103.33141233, 123.69151764, 120.68365695,  82.98918134,
       106.48431621,  89.5863333 ,  84.82401618,  90.21317608,
       120.40967519,  72.50412377,  67.43316976,  93.98148459,
       152.03584106, 109.59814922, 103.18807599,  96.71373523,
       108.3633136 ,  88.77140807,  98.69181552, 119.30636902,
        97.46184935, 104.68407934])

In [3]:
y_pred = y_true + 10 * np.random.randn(50)
y_pred

array([ 68.30997211,  71.32378267,  78.18502028,  65.15137361,
       104.82105476,  96.29467704, 115.29953652,  91.28148893,
        65.42444446,  87.45747258, 107.80334547, 105.34856544,
       125.09117211,  61.88141623, 166.1418625 , 115.58751023,
        97.07286532,  84.73061788, 103.97234351, 111.71349523,
        64.68685957, 141.81802945, 119.43389102,  81.17958658,
        98.83284127, 109.93133825, 116.13648932,  89.71124065,
       116.43571396, 141.37562623, 114.61238447,  76.8973456 ,
        95.92448456,  71.67404721,  69.67580735,  89.949282  ,
       123.93717161,  73.6408973 ,  85.00208613, 104.37886472,
       161.55711139,  96.9464432 , 100.27807514, 106.54190924,
        96.18281385, 100.44375827, 101.47601856, 112.08648315,
        95.25493499, 101.3267392 ])

In [4]:
results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results.head()

Unnamed: 0,y_true,y_pred
0,75.618432,68.309972
1,76.556098,71.323783
2,73.698925,78.18502
3,92.849273,65.151374
4,99.089455,104.821055


In [5]:
results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results.head()

Unnamed: 0,y_true,y_pred
0,75.618432,68.309972
1,76.556098,71.323783
2,73.698925,78.18502
3,92.849273,65.151374
4,99.089455,104.821055


### <a name='a1'></a> 2. Metryki
### <a name='a2'></a> 2.1 Interpretacja graficzna

In [6]:
def plot_regression_results(y_true, y_pred):
    results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
    min = results[['y_true', 'y_pred']].min().min()
    print(min)
    max = results[['y_true', 'y_pred']].max().max()
    print(max)

    fig = go.Figure(data=[go.Scatter(x=results['y_true'], y=results['y_pred'], mode='markers')
    ,
                    go.Scatter(x=[min, max], y=[min, max])
                    # ,go.Scatter(x=[20, 100], y=[40, 140], mode='lines')
                    ],
                    layout=go.Layout(showlegend=False, width=800,
                                     xaxis_title='y_true',
                                     yaxis_title='y_pred',
                                     title='Regression results'))
    fig.show()
plot_regression_results(y_true, y_pred)

61.51887152334425
166.14186250469896


In [7]:
y_true = 100 + 20 * np.random.randn(1000)
y_pred = y_true + 10 * np.random.randn(1000)
results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results['error'] = results['y_true'] - results['y_pred']

px.histogram(results, x='error', nbins=50, width=800)

### <a name='a3'></a> 2.2 Mean Absolute Error
### $$MAE = \frac{1}{n}\sum_{i=1}^{N}|y_{true} - y_{pred}|$$

In [8]:
def mean_absolute_error(y_true, y_pred):
    return abs(y_true - y_pred).sum() / len(y_true)

mean_absolute_error(y_true, y_pred)

8.268404487595205

In [9]:
from tensorflow.keras.losses import mean_absolute_error
mean_absolute_error(y_true, y_pred)

ImportError: cannot import name 'mean_absolute_error' from 'tensorflow.keras.losses' (/usr/local/lib/python3.10/dist-packages/keras/_tf_keras/keras/losses/__init__.py)

In [10]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_true, y_pred)

8.268404487595205

### <a name='a4'></a> 2.3 Mean Squared Error
### $$MSE = \frac{1}{n}\sum_{i=1}^{N}(y_{true} - y_{pred})^{2}$$

In [11]:
def mean_squared_error(y_true, y_pred):
    return ((y_true - y_pred) ** 2).sum() / len(y_true)

mean_squared_error(y_true, y_pred)

103.25127454859495

In [14]:
from tensorflow.keras.metrics import mean_squared_error # The mean_squared_error function was mistakenly imported from tensorflow.keras.losses. The correct module to import from is tensorflow.keras.metrics
mean_squared_error(y_true, y_pred)

ImportError: cannot import name 'mean_squared_error' from 'tensorflow.keras.metrics' (/usr/local/lib/python3.10/dist-packages/keras/_tf_keras/keras/metrics/__init__.py)

In [13]:
from sklearn.metrics import mean_squared_error

mean_squared_error(y_true, y_pred)

103.25127454859495

### <a name='a5'></a> 2.4 Root Mean Squared Error
### $$RMSE = \sqrt{MSE}$$

In [15]:
def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(((y_true - y_pred) ** 2).sum() / len(y_true))

root_mean_squared_error(y_true, y_pred)

10.161263432693541

In [16]:
np.sqrt(mean_squared_error(y_true, y_pred))

10.161263432693541

### <a name='a6'></a> 2.5 Max Error

In [17]:
def max_error(y_true, y_pred):
    return abs(y_true - y_pred).max()

In [18]:
max_error(y_true, y_pred)

32.207859330880595

In [19]:
from sklearn.metrics import max_error

max_error(y_true, y_pred)

32.207859330880595

### <a name='a7'></a> 2.6 R2 score - współczynnik determinacji
### $$R2\_score = 1 - \frac{\sum_{i=1}^{N}(y_{true} - y_{pred})^{2}}{\sum_{i=1}^{N}(y_{true} - \overline{y_{true}})^{2}}$$

In [20]:
from sklearn.metrics import r2_score

r2_score(y_true, y_pred)

0.7306689407350901

In [21]:
def r2_score(y_true, y_pred):
    numerator = ((y_true - y_pred) ** 2).sum()
    denominator = ((y_true - y_true.mean()) ** 2).sum()
    try:
        r2 = 1 - numerator / denominator
    except ZeroDivisionError:
        print('Dzielenie przez zero')
    return r2

In [22]:
r2_score(y_true, y_pred)

0.7306689407350901