###### Importing libraries

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

###### Creating class

In [None]:
class plot_drawer():
    """ Creates plots for corner number prediction model. Input should be a path to json format dataset or a pandas dataframe. 
        Columns: name|gt_corners|rb_corners|mean|max|min|floor_mean|floor_max|floor_min|ceiling_mean|ceiling_max|ceiling_min"""

    def __init__(self, data = 'https://ai-process-sandy.s3.eu-west-1.amazonaws.com/purge/deviation.json'):
        """ Accepts url/local path to model performance json file or a pandas dataframe.
        Default input is the dataset located at 'https://ai-process-sandy.s3.eu-west-1.amazonaws.com/purge/deviation.json' """
        
        if type(data) == str:
            self.data_table = pd.read_json(data)
        else:
            self.data_table = data
        
    def draw_plots(self):
        """ Draws 4 plots with model performance overview. 
        Plots are saved into '\plots' folder in the current working directory. Returns path to plots drawn. """
    
        corner_model = self.data_table
    
        # Data overview graph
        plot_1_name = '\\data_overview.png'
        
        # Preparing data for room names that have 10+ observations in dataset
        top_rooms_limit = 10
        top_rooms_names = corner_model.name.value_counts().where(corner_model.name.value_counts()>top_rooms_limit).dropna().index
        top_rooms_data = corner_model.query('name in @top_rooms_names')
        top_rooms_count = top_rooms_data.pivot_table(index = 'name', values = 'gt_corners', aggfunc = 'count').sort_values(by = 'gt_corners', ascending = False).rename(columns = {'gt_corners': 'qty'}).reset_index()
        
        # Graph will consist of 2 plots
        fig1, (ax1, ax2) = plt.subplots(1, 2, figsize = (15, 5))
        fig1.suptitle(str(len(corner_model)) + ' rooms total in the dataset', size = 'x-large')
        
        # 1. Pie chart showing distribution of rooms by corner count
        ax1.pie(corner_model.gt_corners.value_counts(), labeldistance = 1.2, autopct = '%1.0f%%', pctdistance = 0.6)
        ax1.legend([str(i) + ' corners' for i in corner_model.gt_corners.value_counts().index], loc = 'best')
        ax1.title.set_text(str(corner_model.gt_corners.value_counts().max()) + ' rooms with ' + str(corner_model.gt_corners.value_counts().index[0]) + ' corners')

        # 2. Horizontal bar chart showing number of rooms that have 10+ observations in dataset
        ax2.barh(data = top_rooms_count.sort_values(by = 'qty'), y = 'name', width = 'qty')
        ax2.title.set_text(str(len(top_rooms_names)) + ' room types with ' + str(top_rooms_limit) + '+ observations (' + str(top_rooms_count.qty.sum()) + ' samples)')
        
        
        # Distribution of model predictions graph
        plot_2_name = '\\predictions_distribution.png'
        
        # Calculating inter quartile range (IQR) - distance between 25% and 75% of predictions
        IQR_mean = corner_model['mean'].quantile(0.75) - corner_model['mean'].quantile(0.25)
        IQR_max = corner_model['max'].quantile(0.75) - corner_model['max'].quantile(0.25)
        IQR_min = corner_model['min'].quantile(0.75) - corner_model['min'].quantile(0.25)

        # Calculating outlier limit for predictions to present in boxplots
        mean_max_boxplot = corner_model["mean"].quantile(0.75) + IQR_mean*1.5
        max_max_boxplot = corner_model["max"].quantile(0.75) + IQR_max*1.5
        min_max_boxplot = corner_model["min"].quantile(0.75) + IQR_min*1.5
        
        # Graph will consist of 6 boxplots that show distribution of mean, max and min predictions made by model.
        fig2, ax = plt.subplots(3, 3, figsize = (15, 10))
        fig2.suptitle('Distribution of predictions for ' + str(len(corner_model)) + ' observations in dataset', size = 'x-large')

        # 1. Plots presenting distribution of mean floor and ceiling corner number predictions
        sns.boxplot(data = corner_model, x = 'mean', ax = ax[0, 0]).set_title(str(len(corner_model.query('mean > @mean_max_boxplot'))) + ' outliers above ' + str(round(mean_max_boxplot, 2)))
        ax[0,0].axvline(x = mean_max_boxplot, ls = ':', color = 'red')

        sns.boxplot(data = corner_model, x = 'floor_mean', ax = ax[1, 0])
        sns.boxplot(data = corner_model, x = 'ceiling_mean', ax = ax[2, 0])

        # 2. Plots presenting distribution of max floor and ceiling corner number predictions
        sns.boxplot(data = corner_model, x = 'max', ax = ax[0, 1]).set_title(str(len(corner_model.query('max > @max_max_boxplot'))) + ' outliers above ' + str(round(max_max_boxplot, 2)))
        ax[0,1].axvline(x = max_max_boxplot, ls = ':', color = 'red')

        sns.boxplot(data = corner_model, x = 'floor_max', ax = ax[1, 1])
        sns.boxplot(data = corner_model, x = 'ceiling_max', ax = ax[2, 1])

        # 3. Plots presenting distribution of min floor and ceiling corner number predictions
        sns.boxplot(data = corner_model, x = 'min', ax = ax[0, 2]).set_title(str(len(corner_model.query('min > @min_max_boxplot'))) + ' outliers above ' + str(round(min_max_boxplot, 2)))
        ax[0,2].axvline(x = min_max_boxplot, ls = ':', color = 'red')

        sns.boxplot(data = corner_model, x = 'floor_min', ax = ax[1, 2])
        sns.boxplot(data = corner_model, x = 'ceiling_min', ax = ax[2, 2])
        
        
        # Prediction overview graph
        plot_3_name = '\\predictions_overview.png'
        
        # Calculating median values for floor and ceiling predictions
        mean_med = round(corner_model['mean'].median(), 2)
        floor_med = round(corner_model['floor_mean'].median(), 2)
        ceiling_med = round(corner_model['ceiling_mean'].median(), 2)

        # Calculating threshold that includes 75% of floor and ceiling predictions
        mean_75 = round(corner_model['mean'].quantile(0.75), 0)
        floor_75 = round(corner_model['floor_mean'].quantile(0.75), 0)
        ceiling_75 = round(corner_model['ceiling_mean'].quantile(0.75), 0)
        top_75 = round(max(mean_75, floor_75, ceiling_75), 0)
        
        # Graph will consist of 3 histplots that show prediction count for 75% of floor and ceiling mean predictions
        fig3, ax = plt.subplots(1, 3, figsize = (15, 5))
        fig3.suptitle('Mean predictions for 75% of rooms do not exceed ' + str(top_75), size = 'x-large')

        # 1. Plot 1 is for overall mean prediction.
        sns.histplot(corner_model, x = 'mean', binrange = (0, mean_75), ax = ax[0])
        ax[0].axvline(x = mean_med, ls = '--', label = '{} median prediction'.format(mean_med), color = 'red')
        ax[0].legend()

        # 2. Plot 2 is for mean floor corner number predictions.
        sns.histplot(corner_model, x = 'floor_mean', binrange = (0, floor_75), ax = ax[1])
        ax[1].axvline(x = floor_med, ls = '--', label = '{} median prediction'.format(floor_med), color = 'red')
        ax[1].legend()

        # 3. Plot 3 is for mean ceiling corner number predictions.
        sns.histplot(corner_model, x = 'ceiling_mean', binrange = (0, ceiling_75), ax = ax[2])
        ax[2].axvline(x = ceiling_med, ls = '--', label = '{} median prediction'.format(ceiling_med), color = 'red')
        ax[2].legend()
        
        
        # Floor vs ceiling predictions graph
        plot_4_name = '\\floor_v_ceiling_predictions.png'
        
        # Calculating difference between floor and ceiling corner number predictions
        corner_model['floor_v_ceiling_mean'] = corner_model['floor_mean'] - corner_model['ceiling_mean']
        corner_model['floor_v_ceiling_max'] = corner_model['floor_max'] - corner_model['ceiling_max']
        corner_model['floor_v_ceiling_min'] = corner_model['floor_min'] - corner_model['ceiling_min']

        # Calculating average deviations in predictions for floor and ceiling corner numbers
        fvc_mean = round(corner_model.floor_v_ceiling_mean.mean(), 2)
        fvc_max = round(corner_model.floor_v_ceiling_max.mean(), 2)
        fvc_min = round(corner_model.floor_v_ceiling_min.mean(), 2)

        # Calculating median deviations in predictions for floor and ceiling corner numbers
        fvc_med_mean = round(corner_model.floor_v_ceiling_mean.median(), 2)
        fvc_med_max = round(corner_model.floor_v_ceiling_max.median(), 2)
        fvc_med_min = round(corner_model.floor_v_ceiling_min.median(), 2)
        
        # Graph will include 3 plots that show deviations between predictions made for floor and ceiling corner numbers
        fig4, ax = plt.subplots(1, 3, figsize = (15, 5), facecolor = 'white')
        fig4.suptitle('Floor vs ceiling predictions mean difference is ' + str(fvc_mean), size = 'x-large')

        # 1. Plot 1 is for deviations between mean predictions
        sns.stripplot(data = corner_model, x = 'floor_v_ceiling_mean', ax = ax[0])
        ax[0].axvline(x = fvc_med_mean, ls = '--', label = '{} median deviation'.format(fvc_med_mean), color = 'red')
        ax[0].legend()

        # 2. Plot 2 is for deviations between max predictions
        sns.stripplot(data = corner_model, x = 'floor_v_ceiling_max', ax = ax[1])
        ax[1].axvline(x = fvc_med_max, ls = '--', label = '{} median deviation'.format(fvc_med_max), color = 'red')
        ax[1].legend()

        # 3. Plot 3 is for deviations between min predictions
        sns.stripplot(data = corner_model, x = 'floor_v_ceiling_min', ax = ax[2])
        ax[2].axvline(x = fvc_med_min, ls = '--', label = '{} median deviation'.format(fvc_med_min), color = 'red')
        ax[2].legend()
        
        
        # Graphs will be saved into "plots" folder.
        
        # Creating new "plots" folder in current directory.
        plots_folder = os.getcwd() + '\plots'                  
        
        # Saving 4 graphs with names specified above.
        for i in range(1, 5):
            fig = locals()['fig{}'.format(i)]
            plot = locals()['plot_{}_name'.format(i)]
            
            if os.path.isdir(plots_folder):
                fig.savefig(plots_folder + plot, facecolor = 'w')
            else:
                os.mkdir(plots_folder)
                'fig{}'.format(i).savefig(plots_folder + plot, facecolor = 'w')
        
        # Function returns path to the "plots" folder where graphs have been saved.
        return 'Plots generated and saved in ' + plots_folder

###### Run cell below to test the class

In [None]:
class_test = plot_drawer()
class_test.draw_plots()