In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

np.random.seed(42)
plt.rcParams["figure.figsize"] = (10,6)

### 1. Przygotowanie danych

In [None]:
y_true = 100 + 20 * np.random.randn(50)
y_pred = y_true + 10 * np.random.randn(50)
results = pd.DataFrame(data={'y_true': y_true, 'y_pred': y_pred})

In [None]:
results = pd.DataFrame(data={'y_true': y_true, 'y_pred': y_pred})
results.head()

#### - obliczenie różnicy błędów i kwadratu błędu

In [None]:
results['error'] = results['y_true'] - results['y_pred']
results['error_squared'] = results['error'] ** 2
results.head()

#### - obliczanie MAE, MSE, RMSE, ME

In [None]:
print (f"MAE - Mean absolute error: {np.abs(results['error']).sum() / len(results)}")
print (f"MSE - Mean squared error: {results['error_squared'].sum() / len(results)}")
print (f"RMSE - Root mean squared error: {np.sqrt(results['error_squared'].sum() / len(results))}")
print (f"ME - Max error: {max(abs(results['error']))} ")

### 2. Interpretacja graficzna modelu:

In [None]:
def plot_regression_results(y_true, y_pred): 

    results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
    min = results[['y_true', 'y_pred']].min().min()
    max = results[['y_true', 'y_pred']].max().max()

    fig = go.Figure(data=[go.Scatter(x=results['y_true'], y=results['y_pred'], mode='markers'),
                    go.Scatter(x=[min, max], y=[min, max])],
                    layout=go.Layout(showlegend=False, width=800,
                                     xaxis_title='y_true', 
                                     yaxis_title='y_pred',
                                     title='Regresja: y_true vs. y_pred'))
    fig.show()



In [None]:
plot_regression_results(y_true, y_pred)

### 3. Histogram błędów

In [None]:
y_true = 100 + 20 * np.random.randn(1000)
y_pred = y_true + 10 * np.random.randn(1000)

results = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
results['error'] = results['y_true'] - results['y_pred']

px.histogram(results, x='error', nbins=50, width=800)

In [None]:
plt.hist(x=results['error'].sort_values(), bins=50,);
plt.xlabel('error')
plt.ylabel('count')

### 4. Współczynniki ze sklearn

In [2]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, max_error

In [3]:
y_true = 100 + 20 * np.random.randn(1000)
y_pred = y_true + 10 * np.random.randn(1000)
results = pd.DataFrame(data={'y_true': y_true, 'y_pred': y_pred})

In [4]:
mae = mean_absolute_error(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_true, y_pred)
max_err = max_error(y_true, y_pred)


In [5]:
model_scores = pd.DataFrame(data={'mae':mae, 'mse':mse, 'rmse':rmse, 'r2_score': r2, 'max_error': max_err}, index=['model scores'])

In [7]:
model_scores

Unnamed: 0,mae,mse,rmse,r2_score,max_error
model scores,7.916481,99.893809,9.994689,0.739291,31.931076


In [8]:
mse

99.89380919231107