# MAV - Laboratory 3 -> PAIS

In [84]:
from types import FunctionType

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import boston_housing

In [85]:
(train_x, train_y), (test_x, test_y) = boston_housing.load_data()

##### В конце понадобиться для вывода средних графиков

In [86]:
MEAN_MAE_SCORE: dict = {}
MEAN_MSE_SCORE: dict = {}

In [87]:
print(train_x.shape)
print(test_x.shape)
print(test_x)

In [88]:
def normalize(train_data, test_data):
    mean = train_data.mean(axis=0)
    train_data -= mean
    std = train_data.std(axis=0)
    train_data /= std
    test_data -= mean
    test_data /= std
    
    return train_data, test_data

In [89]:
from sklearn.preprocessing import StandardScaler

def standardize(train_data, test_data):
    scaler = StandardScaler()
    
    scaled_X_train = scaler.fit_transform(train_data)
    scaled_X_test = scaler.transform(test_data)
    
    return scaled_X_train, scaled_X_test

In [90]:
def build_regression_model():
    model = Sequential()
    model.add(Dense(64, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1))
    
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    return model

In [91]:
def hand_cross_validation_model(train_X = train_x, test_X = test_x, train_Y = train_y, k: int = 4, num_epochs: int = 100, batch_size: int = 2, scaling_function = normalize):

    train_data, test_data = scaling_function(train_X, test_X)
    
    num_val_samples: int = len(train_data) // k
    all_scores_mae: list = []
    all_losses_mse: list = []
    
    for i in range(k):
        print('processing fold #', i)
        val_data = train_data[i *  num_val_samples: (i + 1) * num_val_samples]
        val_targets = train_Y[i * num_val_samples: (i + 1) * num_val_samples]
    
        partial_train_data = np.concatenate([train_data[:i * num_val_samples], train_data[(i + 1) * num_val_samples:]], axis=0)
        partial_train_targets = np.concatenate([train_Y[:i * num_val_samples], train_Y[(i + 1) * num_val_samples:]], axis=0)
    
        model = build_regression_model()
        history = model.fit(partial_train_data, partial_train_targets, epochs=num_epochs, batch_size=batch_size, validation_split=0.1, verbose=0)
    
        history_df = pd.DataFrame(history.history)
    
        fig, axes = plt.subplots(1, 2, figsize=(15,7))

        sns.lineplot(data=history_df['mae'], ax=axes[0], color='red')
        sns.lineplot(data=history_df['val_mae'], ax=axes[0],  color='green')
        axes[0].set_title(f'Mae in {i + 1} iteration')

        sns.lineplot(data=history_df['loss'], ax=axes[1], color='red')
        sns.lineplot(data=history_df['val_loss'], ax=axes[1], color='green')
        axes[1].set_title(f'Loss in {i + 1} iteration')
        
        val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
        
        all_scores_mae.append(val_mae)
        all_losses_mse.append(val_mse)
        
        
        
    print(f'All Scores MAE: {all_scores_mae}')
    print(f'Mean of MAE score ---> {np.mean(all_scores_mae)}')
    
    print(f'All Losses MSE: {all_losses_mse}')
    print(f'Mean of MSE loss ---> {np.mean(all_losses_mse)}')  
    
    return np.mean(all_scores_mae), np.mean(all_losses_mse)


##### Обучение и построение модели на чистых данных `Hold-Out` -> обучение `test` <-> `train`  

In [92]:
def final_evaluation(num_epochs: int = 100, batch_size: int = 10):
    model_without_CV = build_regression_model()
    train_data, test_data = standardize(train_x, test_x)

    history = model_without_CV.fit(train_data, train_y, epochs=num_epochs, batch_size=batch_size, validation_split=0.1)

    history_df = pd.DataFrame(history.history)

    fig, axes = plt.subplots(1, 2, figsize=(15,7))

    sns.lineplot(data=history_df['mae'], ax=axes[0], color='red')
    sns.lineplot(data=history_df['val_mae'], ax=axes[0],  color='green')
    axes[0].set_title('Mae')

    sns.lineplot(data=history_df['loss'], ax=axes[1], color='red')
    sns.lineplot(data=history_df['val_loss'], ax=axes[1], color='green')
    axes[1].set_title('Loss')
    
    final_mae, final_mse = model_without_CV.evaluate(test_data, test_y)
    
    print(f'Final MAE ---> {final_mae}')
    print(f'Final MSE ---> {final_mse}')
    

##### Starting Analysis...

In [93]:
MAE, MSE = hand_cross_validation_model(batch_size=10, num_epochs=100)
MEAN_MAE_SCORE['default'] = MAE
MEAN_MSE_SCORE['default'] = MSE

In [94]:
MAE, MSE = hand_cross_validation_model(k=2, batch_size=10, num_epochs=150, scaling_function=standardize)
MEAN_MAE_SCORE['k=2 e=150'] = MAE
MEAN_MSE_SCORE['k=2 e=150'] = MSE

In [95]:
final_evaluation()

##### `w_OF` -> without overfitting

In [96]:
MAE, MSE = hand_cross_validation_model(k=10, num_epochs=80, batch_size=10, scaling_function=standardize)
MEAN_MAE_SCORE['k=10 e=80 w_OF'] = MAE
MEAN_MSE_SCORE['k=10 e=80 w_OF'] = MSE

In [101]:
MAE, MSE = hand_cross_validation_model(k=6, num_epochs=60, batch_size=10, scaling_function=standardize)
MEAN_MAE_SCORE['optimal'] = MAE
MEAN_MSE_SCORE['optimal'] = MSE

# <-------------------------------------------------------------------- Final model comparison -------------------------------------------------------------------->

In [102]:
df_mae_score = pd.DataFrame(MEAN_MAE_SCORE.items(), columns=['Name', 'MAE'])
df_mae_score

In [103]:
df_mse_score = pd.DataFrame(MEAN_MSE_SCORE.items(), columns=['Name', 'MSE'])
df_mse_score

In [112]:
fig = plt.figure(figsize=(15,7), dpi=300)

sns.pointplot(data=df_mae_score, x='Name', y='MAE', color='red')
sns.pointplot(data=df_mse_score, x='Name', y='MSE', color='green')
plt.title('MAE vs MSE');