In [59]:
import pandas as pd

In [60]:
class YiLong:



    def __init__(self, type):
        
        # check correct input
        assert type == 'Classification' or type == 'Regression'

        self.clf_type = type
        self._initialise_objects() # Initialise objects
        print(f'YiLong Initialised to analyse {self.clf_type}')



    def _initialise_objects(self):
        """ Helper to initialise objects """

        self.regression_extra_output_columns = ['Train r2', 'Val r2', 'Test r2', 
            'Train RMSE', 'Val RMSE', 'Test RMSE', 'Train MAPE', 'Val MAPE', 'Test MAPE', 'Time']
        self.classification_extra_output_columns = ['Train accu', 'Val accu', 'Test accu', 
            'Train balanced_accu', 'Val balanced_accu', 'Test balanced_accu', 'Train f1', 'Val f1', 'Test f1', 
            'Train precision', 'Val precision', 'Test precision', 'Train recall', 'Val recall', 'Test recall', 'Time']
        self.tuning_result = None
        self.hyperparameters = None

    

    def read_tuning_result(self, address):
        """ Read in Tuning Result """

        self.tuning_result = pd.read_csv(address)

        print(f'Successfully read in tuning result, with {len(self.tuning_result)} columns')

        # get list of hyperparameters by taking what is not in the extra_output_columns
        if self.clf_type == 'Classification':
            self.hyperparameters = [col for col in self.tuning_result.columns if col not in self.classification_extra_output_columns]

        elif self.clf_type == 'Regression':
            self.hyperparameters = [col for col in self.tuning_result.columns if col not in self.regression_extra_output_columns]



    def read_sorted_full_df(self):
        """ View dataframe sorted in reverse in terms of validation score """

        if self.clf_type =='Regression':
            display(self.tuning_result.sort_values(['Val r2'], ascending = False))
        elif self.clf_type =='Classification':
            display(self.tuning_result.sort_values(['Val accu'], ascending = False))

    

    def read_mean_val_scores(self):
        """ View the means of evaluation metrics for combinations containing each individual value of a hyperparameter – for each hyperparameter """

        for col in self.hyperparameters: # for each hyperparameter

            print(col)

            hyperparameter_values = list(set(self.tuning_result[col]))
            hyperparameter_values.sort()
            
            # create this temporary dataframe
            validation_score_df = pd.DataFrame()
            for value in hyperparameter_values: # for each value in the hyperparameter
                tmp_df = self.tuning_result[self.tuning_result[col] == value] # select df with only those parameter values

                # get means
                if self.clf_type == 'Classification':
                    tmp_df_mean = tmp_df[self.classification_extra_output_columns[:-1]].mean().T
                elif self.clf_type == 'Regression':
                    tmp_df_mean = tmp_df[self.regression_extra_output_columns[:-1]].mean().T

                # get number of observations in this group
                tmp_df_mean['n'] = len(tmp_df)

                # append to this temporary dataframe
                validation_score_df[f'{value}'] = tmp_df_mean

            display(validation_score_df)

In [61]:
yilong = YiLong('Regression')

YiLong Initialised to analyse Regression


In [62]:
yilong.read_tuning_result('../models/tuning/RS_knr_1.csv')

Successfully read in tuning result, with 80 columns


In [63]:
yilong.read_sorted_full_df()

Unnamed: 0,n_neighbors,weights,Train r2,Val r2,Test r2,Train RMSE,Val RMSE,Test RMSE,Train MAPE,Val MAPE,Test MAPE,Time
72,35.0,distance,1.0000,0.3403,0.3222,0.0000,0.2044,0.2073,0.000000e+00,1.987643e+14,2.014328e+14,0.0
54,70.0,distance,1.0000,0.3401,0.3315,0.0000,0.2044,0.2058,0.000000e+00,2.025692e+14,2.032047e+14,0.0
69,45.0,distance,1.0000,0.3397,0.3247,0.0000,0.2045,0.2069,0.000000e+00,2.004457e+14,2.021013e+14,0.0
39,65.0,distance,1.0000,0.3396,0.3297,0.0000,0.2045,0.2061,0.000000e+00,2.024369e+14,2.030907e+14,0.0
37,75.0,distance,1.0000,0.3395,0.3320,0.0000,0.2045,0.2058,0.000000e+00,2.029299e+14,2.033300e+14,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
42,15.0,uniform,0.4064,0.3166,0.3073,0.1939,0.2080,0.2095,1.801309e+14,1.963602e+14,1.968526e+14,0.0
17,10.0,distance,1.0000,0.2991,0.2858,0.0000,0.2107,0.2128,0.000000e+00,1.950277e+14,1.958677e+14,0.0
3,10.0,uniform,0.4272,0.2980,0.2853,0.1905,0.2108,0.2128,1.734909e+14,1.953454e+14,1.961245e+14,0.0
16,5.0,distance,1.0000,0.2426,0.2271,0.0000,0.2190,0.2213,0.000000e+00,1.932633e+14,1.945093e+14,0.0


In [64]:
yilong.read_mean_val_scores()

n_neighbors


Unnamed: 0,5.0,10.0,15.0,20.0,25.0,30.0,35.0,40.0,45.0,50.0,...,155.0,160.0,165.0,170.0,175.0,180.0,185.0,190.0,195.0,200.0
Train r2,0.7442,0.7136,0.7032,0.6961,0.6926,0.68975,0.68765,0.6858,0.68465,0.6834,...,0.66965,0.66915,0.66875,0.6684,0.66815,0.6678,0.6677,0.66735,0.6671,0.66685
Val r2,0.2422,0.29855,0.3171,0.33155,0.335,0.3365,0.33985,0.33835,0.3391,0.33875,...,0.332,0.332,0.3318,0.33135,0.33045,0.33015,0.32955,0.3288,0.3285,0.32895
Test r2,0.2271,0.28555,0.3074,0.3129,0.3153,0.31945,0.32185,0.32445,0.32435,0.3268,...,0.32865,0.32815,0.32795,0.3276,0.32735,0.3267,0.32585,0.32595,0.32575,0.32535
Train RMSE,0.09,0.09525,0.09695,0.0981,0.09865,0.0991,0.09945,0.09975,0.0999,0.1001,...,0.10225,0.10235,0.1024,0.10245,0.1025,0.10255,0.10255,0.10265,0.10265,0.1027
Val RMSE,0.21905,0.21075,0.20795,0.20575,0.2052,0.205,0.20445,0.2047,0.2046,0.2046,...,0.20565,0.20565,0.2057,0.20575,0.2059,0.20595,0.20605,0.20615,0.2062,0.20615
Test RMSE,0.2213,0.2128,0.2095,0.20865,0.20835,0.2077,0.20735,0.20695,0.20695,0.20655,...,0.2063,0.20635,0.2064,0.20645,0.2065,0.2066,0.2067,0.2067,0.2067,0.2068
Train MAPE,76524070000000.0,86745430000000.0,90065460000000.0,92405810000000.0,93877510000000.0,94985640000000.0,95808270000000.0,96504150000000.0,97017580000000.0,97452270000000.0,...,102396100000000.0,102547600000000.0,102665100000000.0,102804200000000.0,102900900000000.0,103002400000000.0,103102300000000.0,103222700000000.0,103335800000000.0,103434300000000.0
Val MAPE,193441800000000.0,195186500000000.0,196205500000000.0,196231900000000.0,197567000000000.0,198482300000000.0,198954000000000.0,199954000000000.0,200674900000000.0,201333700000000.0,...,207537000000000.0,207703000000000.0,207823600000000.0,208085500000000.0,208391900000000.0,208552800000000.0,208731000000000.0,208951300000000.0,209099300000000.0,209135600000000.0
Test MAPE,194631100000000.0,195996100000000.0,196743700000000.0,199380200000000.0,200541200000000.0,201023100000000.0,201634100000000.0,201573600000000.0,202293300000000.0,202508300000000.0,...,207463800000000.0,207650800000000.0,207804800000000.0,208021000000000.0,208165300000000.0,208447400000000.0,208700800000000.0,208803000000000.0,208912600000000.0,209081100000000.0
n,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0


weights


Unnamed: 0,distance,uniform
Train r2,1.0,0.3576725
Val r2,0.331435,0.3299975
Test r2,0.3234375,0.3227125
Train RMSE,0.0,0.2016225
Val RMSE,0.20573,0.2059675
Test RMSE,0.2070675,0.2071825
Train MAPE,0.0,198114200000000.0
Val MAPE,203859600000000.0,204383700000000.0
Test MAPE,204367400000000.0,204799200000000.0
n,40.0,40.0
