In [1]:
%matplotlib inline

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
def evaluate(model, X_train, y_train, X_test, y_test):
    """
    Описание на функцията и атрибутите
    """
    
    print('TRAIN Sets results:')
    print('' * 20)
    pred = model.predict(X_train)
    print(f'R2 Score: {r2_score(y_train, pred)}')
    print(f'Mean Absolute Error: {mean_absolute_error(y_train, pred)}')
    print(f'Mean Squared Error: {mean_squared_error(y_train, pred)}')
    print(f'Root Mean Squared Error: {np.sqrt(mean_squared_error(y_train, pred))}')
    
#     print(f'Cross val score: {np.mean(cross_val_score(model, X_train, y_train, cv = 5))}')
    print('' * 50)
    print('*' * 50)
    
    print('TEST Sets results:')
    print('' * 20)
    pred = model.predict(X_test)
    print(f'R2 Score: {r2_score(y_test, pred)}')
    print(f'Mean Absolute Error: {mean_absolute_error(y_test, pred)}')
    print(f'Mean Squared Error: {mean_squared_error(y_test, pred)}')
    print(f'Root Mean Squared Error: {np.sqrt(mean_squared_error(y_test, pred))}')
    
#     print(f'Cross val score: {np.mean(cross_val_score(model, X_test, y_test, cv = 5))}')

In [4]:
def final_evaluation(model, X_train, y_train, X_test, y_test):
    pred_train = model.predict(X_train)
    pred_test = model.predict(X_test)
    r2_train = r2_score(y_train, pred_train) 
    r2_test = r2_score(y_test, pred_test)
    mae = mean_absolute_error(np.expm1(y_test), np.expm1(pred_test))
    mse = mean_squared_error(np.expm1(y_test), np.expm1(pred_test))
    rmse = np.sqrt(mean_squared_error(np.expm1(y_test), np.expm1(pred_test)))
    return r2_train, r2_test, mae, mse, rmse

In [5]:
def plot(y_test, y_predict):
    plt.figure(figsize=(14, 10))

    plt.subplot(2,2,1)
    plt.hist(y_test, label='test', alpha=0.7, bins=30)
    plt.hist(y_predict, label='predicted', alpha=0.7, bins=30)
    plt.xlim(3.5,7)
    plt.ylim(0,60)
    plt.xlabel('Price values')
    plt.ylabel('Distribution')
    plt.legend()

    plt.subplot(2,2,2)
    plt.scatter(y_test, y_predict, s=10)
    plt.xlim([3.5,7])
    plt.ylim([3.5,7])
    plt.xlabel('Actual')
    plt.ylabel('Predicted')

    plt.subplot(2,2,3)
    plt.hist(y_test - y_predict, bins=20)
    plt.axvline([0], color='red')
    plt.xlim([-1,1])
    plt.ylim([0,130])
    plt.xlabel('Residuals')
    plt.ylabel('Distribution')

    plt.subplot(2,2,4)
    plt.scatter(y_predict, y_test - y_predict , s=10)
    plt.xlim([3.5,7])
    plt.ylim([-1,1])
    plt.xlabel('Predicted')
    plt.ylabel('Residuals')
    plt.axhline([0], c='red')


    plt.show()


In [6]:
def mutual_plot(models, test_set, y_test):
    
    for model in models:
        y_predict = model.predict(test_set)
        plt.figure(figsize=(15, 3))
        
        
        plt.subplot(1,4,1)
        plt.hist(y_test, label='test set', alpha=0.7, bins=30)
        plt.hist(y_predict, label='predicted', alpha=0.7, bins=30)
        plt.xlim(3.5,7)
        plt.ylim(0,60)
        plt.xlabel('Price values')
        plt.ylabel('Distribution')
        plt.legend()

        plt.subplot(1,4,2)
        plt.scatter(y_test, y_predict, s=10)
        plt.xlim([3.5,7])
        plt.ylim([3.5,7])
        plt.xlabel('Actual')
        plt.ylabel('Predicted')

        plt.subplot(1,4,3)
        plt.hist(y_test - y_predict, bins=20)
        plt.axvline([0], color='red')
        plt.xlim([-1,1])
        plt.ylim([0,130])
        plt.xlabel('Residuals')
        plt.ylabel('Distribution')

        plt.subplot(1,4,4)
        plt.scatter(y_predict, y_test - y_predict , s=10)
        plt.xlim([3.5,7])
        plt.ylim([-1,1])
        plt.xlabel('Predicted')
        plt.ylabel('Residuals')
        plt.axhline([0], c='red')


        plt.show()


In [7]:
# def proba(models, test_set, y_test):
    
#     for model in models:
#         y_predict = model.predict(test_set)

#         fig, ((ax1, ax2, ax3, ax4)) = plt.subplots(1, 4)
#         fig.suptitle(model)
                                  
#         ax1.hist(y_test, label='test', alpha=0.7, bins=30)
#         ax1.hist(y_predict, label='predicted', alpha=0.7, bins=30)
#         ax1.set(xlabel='Price values', ylabel='Distribution')
#         ax1.legend()            
                                  
# #         ax2.plot(x, y**2, 'tab:orange')
                                  
# #         ax3.plot(x, -y, 'tab:green')
                                  
# #         ax4.plot(x, -y**2, 'tab:red')

# #         for ax in fig.get_axes():
# #             ax.label_outer()