# Naive Bayes Classifier

## The overall video ratings for `Observer`(V1), `Strategy`(V2) and `MVC`(V3) patterns will be classified separately based on each learning style dimension scores below...
* A/R_Score
* S/I_Score
* Vi/Vb_Score
* S/G_Score

The classifiers were trained using `GridSearchCV`, a hyperparamater tuning technique, and the training set was split into `10-fold cross validation`. The average scores were collected after each classifier was 'train test split' a few times (various random states) with train-test.

# Importing the necessry libraries

In [90]:
### For data manipulation and visualisation
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### To Encode the data
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

### Machine Learning
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.naive_bayes import CategoricalNB
from imblearn.over_sampling import SMOTE,RandomOverSampler

# Reading the dataset

In [91]:
pd.set_option('display.max_columns', None)
training = pd.read_csv('BDP_Cleaned.csv') #dataset after feature selection process for 5-Level Classification

# Dropping all irrelevant features
* `Time_Day_Watched` of all design patterns as features have already been extracted
* `Gender` as male and females are imbalanced and does not provide any useful insights
* `Duration` of each video as watch status and percentage duration have already been extracted from it

In [92]:
training.drop(['Time_Day_Watched_(V1-1)',
         'Time_Day_Watched_(V1-2)',
         'Time_Day_Watched_(V1-3)',
         'Time_Day_Watched_(V1-4)',
         'Time_Day_Watched_(V2-1)',
         'Time_Day_Watched_(V2-2)',
         'Time_Day_Watched_(V2-3)',
         'Time_Day_Watched_(V2-4)',
         'Time_Day_Watched_(V3-1)',
         'Time_Day_Watched_(V3-2)',
         'Time_Day_Watched_(V3-3)',
         'Time_Day_Watched_(V3-4)',
               'Gender',
               "V1-1_(1)",
               "V1-2_(1)",
               "V1-3_(1)",
               "V1-4_(1)",
               "V2-1_(2)",
               "V2-1_(2)",
               "V2-2_(2)",
               "V2-3_(2)",
               "V2-4_(2)",
               "V3-1_(3)",
               "V3-2_(3)",
               "V3-3_(3)",
               "V3-4_(3)",
               
        ], axis = 1,inplace = True)

# Getting numerical and categorical features for further tuning scaling and fitting

In [93]:
numerical_feats = training.dtypes[training.dtypes != "object"].index
categorical_feats = training.dtypes[training.dtypes == "object"].index

In [94]:
print("*"*100)
print(training[numerical_feats].columns)
print("*"*100)
print(training[categorical_feats].columns)
print("*"*100)
training.columns

****************************************************************************************************
Index(['V1_PercentWatched', 'V2_PercentWatched', 'V3_PercentWatched',
       'A/R_Score', 'Active', 'Reflective', 'S/I_Score', 'Sensing',
       'Intuitive', 'Vi/Vb_Score', 'Visual', 'Verbal', 'S/G_Score',
       'Sequential', 'Global', 'Quiz'],
      dtype='object')
****************************************************************************************************
Index(['CGPA_Class', 'Student_Rating_(V1-1)', 'Student_Rating_(V1-2)',
       'Student_Rating_(V1-3)', 'Student_Rating_(V1-4)', 'Overall_Rating_V1',
       'Watch_Status_V1', 'Student_Rating_(V2-1)', 'Student_Rating_(V2-2)',
       'Student_Rating_(V2-3)', 'Student_Rating_(V2-4)', 'Overall_Rating_V2',
       'Watch_Status_V2', 'Student_Rating_(V3-1)', 'Student_Rating_(V3-2)',
       'Student_Rating_(V3-3)', 'Student_Rating_(V3-4)', 'Overall_Rating_V3',
       'Watch_Status_V3', 'part_day_V1-1', 'part_day_V1-2', 'part_day_V1-

Index(['CGPA_Class', 'Student_Rating_(V1-1)', 'Student_Rating_(V1-2)',
       'Student_Rating_(V1-3)', 'Student_Rating_(V1-4)', 'Overall_Rating_V1',
       'Watch_Status_V1', 'V1_PercentWatched', 'Student_Rating_(V2-1)',
       'Student_Rating_(V2-2)', 'Student_Rating_(V2-3)',
       'Student_Rating_(V2-4)', 'Overall_Rating_V2', 'Watch_Status_V2',
       'V2_PercentWatched', 'Student_Rating_(V3-1)', 'Student_Rating_(V3-2)',
       'Student_Rating_(V3-3)', 'Student_Rating_(V3-4)', 'Overall_Rating_V3',
       'Watch_Status_V3', 'V3_PercentWatched', 'A/R_Score', 'Active',
       'Reflective', 'S/I_Score', 'Sensing', 'Intuitive', 'Vi/Vb_Score',
       'Visual', 'Verbal', 'S/G_Score', 'Sequential', 'Global', 'Quiz',
       'part_day_V1-1', 'part_day_V1-2', 'part_day_V1-3', 'part_day_V1-4',
       'part_day_V2-1', 'part_day_V2-2', 'part_day_V2-3', 'part_day_V2-4',
       'part_day_V3-1', 'part_day_V3-2', 'part_day_V3-3', 'part_day_V3-4'],
      dtype='object')

# Using MinMax Scaler to scale data from 0 to 1 since numerical features are not normally distributed

In [95]:
scaler = MinMaxScaler()
training['Quiz'] = scaler.fit_transform(training[['Quiz']])
training['A/R_Score'] = scaler.fit_transform(training[['A/R_Score']])
training['S/I_Score'] = scaler.fit_transform(training[['S/I_Score']])
training['Vi/Vb_Score'] = scaler.fit_transform(training[['Vi/Vb_Score']])
training['S/G_Score'] = scaler.fit_transform(training[['S/G_Score']])
training['V1_PercentWatched'] = scaler.fit_transform(training[['V1_PercentWatched']])
training['V2_PercentWatched'] = scaler.fit_transform(training[['V2_PercentWatched']])
training['V3_PercentWatched'] = scaler.fit_transform(training[['V3_PercentWatched']])
training.head()

Unnamed: 0,CGPA_Class,Student_Rating_(V1-1),Student_Rating_(V1-2),Student_Rating_(V1-3),Student_Rating_(V1-4),Overall_Rating_V1,Watch_Status_V1,V1_PercentWatched,Student_Rating_(V2-1),Student_Rating_(V2-2),Student_Rating_(V2-3),Student_Rating_(V2-4),Overall_Rating_V2,Watch_Status_V2,V2_PercentWatched,Student_Rating_(V3-1),Student_Rating_(V3-2),Student_Rating_(V3-3),Student_Rating_(V3-4),Overall_Rating_V3,Watch_Status_V3,V3_PercentWatched,A/R_Score,Active,Reflective,S/I_Score,Sensing,Intuitive,Vi/Vb_Score,Visual,Verbal,S/G_Score,Sequential,Global,Quiz,part_day_V1-1,part_day_V1-2,part_day_V1-3,part_day_V1-4,part_day_V2-1,part_day_V2-2,part_day_V2-3,part_day_V2-4,part_day_V3-1,part_day_V3-2,part_day_V3-3,part_day_V3-4
0,2.67 - 3.32,Excellent,Satisfactory,Very Good,Very Good,Very Good,Completed,0.747748,Very Good,Satisfactory,Very Good,Very Good,Very Good,Completed,0.440789,Excellent,Very Good,Very Good,Very Good,Very Good,Completed,0.632653,0.2,1,0,0.25,1,0,0.6,1,0,0.0,1,0,1.0,night,night,night,night,night,night,night,night,night,night,night,night
1,2.67 - 3.32,Very Good,Very Good,Very Good,Very Good,Very Good,Completed,0.747748,Very Good,Very Good,Very Good,Very Good,Very Good,Completed,0.440789,Very Good,Very Good,Very Good,Very Good,Very Good,Rewatched,0.653061,0.2,0,1,1.0,1,0,0.6,1,0,0.2,0,1,0.75,midnight,night,night,night,night,night,night,night,night,night,night,night
2,2.67 - 3.32,Excellent,Satisfactory,Very Good,Satisfactory,Very Good,Completed,0.747748,Very Good,Satisfactory,Very Good,Excellent,Very Good,Completed,0.440789,Excellent,Very Good,Very Good,Very Good,Very Good,Completed,0.632653,0.0,0,1,0.75,1,0,0.6,1,0,0.2,0,1,0.5,night,night,afternoon,afternoon,night,night,afternoon,afternoon,night,night,afternoon,afternoon
3,3.67 - 4.00,Excellent,Very Good,Excellent,Excellent,Excellent,Completed,0.747748,Excellent,Excellent,Very Good,Excellent,Excellent,Rewatched,0.618421,Excellent,Excellent,Very Good,Excellent,Excellent,Completed,0.632653,0.2,0,1,0.25,1,0,0.6,1,0,0.0,1,0,0.375,midnight,midnight,midnight,afternoon,midnight,midnight,midnight,midnight,midnight,midnight,midnight,midnight
4,2.67 - 3.32,Very Good,Very Good,Satisfactory,Very Good,Very Good,Completed,0.747748,Excellent,Very Good,Very Good,Satisfactory,Very Good,Completed,0.440789,Very Good,Very Good,Excellent,Very Good,Very Good,Completed,0.632653,0.2,1,0,0.0,1,0,0.4,1,0,0.2,0,1,0.75,afternoon,afternoon,afternoon,afternoon,afternoon,afternoon,afternoon,afternoon,afternoon,afternoon,afternoon,afternoon


# Encoding nominal values using label encoder

In [96]:
features =  ["CGPA_Class",
             "Overall_Rating_V1",
             "Overall_Rating_V2",
             "Overall_Rating_V3",
             "Watch_Status_V1",
             "Watch_Status_V2",
             "Watch_Status_V3",
             "part_day_V1-1",
             "part_day_V1-2",
             "part_day_V1-3",
             "part_day_V1-4",
             "part_day_V2-1",
             "part_day_V2-2",
             "part_day_V2-3",
             "part_day_V2-4",
             "part_day_V3-1",
             "part_day_V3-2",
             "part_day_V3-3",
             "part_day_V3-4"]

for i in features:
    training.loc[:,i] = LabelEncoder().fit_transform(training.loc[:,i]) 

### Overall_rating_V1 `(Observer)` for `Active/Reflective` learners (60 Train - 40 Test) | (70 Train - 30 Test) | (80 Train - 20 Test)

In [97]:
temp = []
rand_states = [7,69,101]
y = training[['Overall_Rating_V1']]
X = training[['CGPA_Class',
              'Watch_Status_V1',
              'A/R_Score','Quiz','part_day_V1-1', 'part_day_V1-2', 'part_day_V1-3', 'part_day_V1-4']]

oversample = RandomOverSampler(sampling_strategy='all')
X, y = oversample.fit_resample(X, y)

for i in [0.4,0.3,0.2]:
    for j in rand_states:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=i, random_state=j)
        param_grid = {}

        nb_model = GridSearchCV(GaussianNB(), param_grid, refit=True, cv=10, verbose=1)
        nb_model.fit(X_train, y_train.values.ravel())
        nb_pred = nb_model.predict(X_test)
        
        acc = accuracy_score(y_test,nb_pred)
        # average='weighted' : Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label).
        pre = precision_score(y_test, nb_pred, average='weighted')
        recall = recall_score(y_test, nb_pred, average='weighted')
        f1 = f1_score(y_test, nb_pred, average='weighted')
        
        temp.append([i,j,acc,pre,recall,f1])
        
df_A = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score'],data=temp)
df_A

Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score
0,0.4,7,0.530612,0.507043,0.530612,0.50738
1,0.4,69,0.581633,0.548286,0.581633,0.549137
2,0.4,101,0.55102,0.587417,0.55102,0.561452
3,0.3,7,0.472973,0.42018,0.472973,0.440125
4,0.3,69,0.567568,0.52885,0.567568,0.536551
5,0.3,101,0.554054,0.56572,0.554054,0.547942
6,0.2,7,0.469388,0.427438,0.469388,0.440456
7,0.2,69,0.612245,0.585109,0.612245,0.587929
8,0.2,101,0.632653,0.631633,0.632653,0.628118


In [98]:
df_A.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.2,0.571429,0.54806,0.571429,0.552168
0.3,0.531532,0.504917,0.531532,0.508206
0.4,0.554422,0.547582,0.554422,0.539323


### Overall_rating_V1 `(Observer)` for `Sensing/Intuitive` learners (60 Train - 40 Test) | (70 Train - 30 Test) | (80 Train - 20 Test)MVC

In [99]:
temp = []
rand_states = [7,69,101]
y = training[['Overall_Rating_V1']]
X = training[['CGPA_Class',
              'Watch_Status_V1',
              'S/I_Score','Quiz','part_day_V1-1', 'part_day_V1-2', 'part_day_V1-3', 'part_day_V1-4']]

oversample = RandomOverSampler(sampling_strategy='all')
X, y = oversample.fit_resample(X, y)

for i in [0.4,0.3,0.2]:
    for j in rand_states:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=i, random_state=j)
        param_grid = {}

        nb_model = GridSearchCV(GaussianNB(), param_grid, refit=True, cv=10, verbose=1)
        nb_model.fit(X_train, y_train.values.ravel())
        nb_pred = nb_model.predict(X_test)
        
        acc = accuracy_score(y_test,nb_pred)
        # average='weighted' : Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label).
        pre = precision_score(y_test, nb_pred, average='weighted')
        recall = recall_score(y_test, nb_pred, average='weighted')
        f1 = f1_score(y_test, nb_pred, average='weighted')
        
        temp.append([i,j,acc,pre,recall,f1])
        
df_B = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score'],data=temp)
df_B

Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score
0,0.4,7,0.581633,0.539724,0.581633,0.540838
1,0.4,69,0.55102,0.542083,0.55102,0.542707
2,0.4,101,0.632653,0.564801,0.632653,0.583939
3,0.3,7,0.567568,0.508506,0.567568,0.520457
4,0.3,69,0.527027,0.519789,0.527027,0.516892
5,0.3,101,0.581081,0.578902,0.581081,0.560704
6,0.2,7,0.530612,0.439796,0.530612,0.472998
7,0.2,69,0.530612,0.523834,0.530612,0.521232
8,0.2,101,0.612245,0.604956,0.612245,0.592451


In [100]:
df_B.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.2,0.557823,0.522862,0.557823,0.528894
0.3,0.558559,0.535732,0.558559,0.532685
0.4,0.588435,0.548869,0.588435,0.555828


### Overall_rating_V1 `(Observer)` for `Visual/Verbal` learners (60 Train - 40 Test) | (70 Train - 30 Test) | (80 Train - 20 Test)

In [101]:
temp = []
rand_states = [7,69,101]
y = training[['Overall_Rating_V1']]
X = training[['CGPA_Class',
              'Watch_Status_V1',
              'Vi/Vb_Score','Quiz','part_day_V1-1', 'part_day_V1-2', 'part_day_V1-3', 'part_day_V1-4']]

oversample = RandomOverSampler(sampling_strategy='all')
X, y = oversample.fit_resample(X, y)

for i in [0.4,0.3,0.2]:
    for j in rand_states:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=i, random_state=j)
        param_grid = {}

        nb_model = GridSearchCV(GaussianNB(), param_grid, refit=True, cv=10, verbose=1)
        nb_model.fit(X_train, y_train.values.ravel())
        nb_pred = nb_model.predict(X_test)
        
        acc = accuracy_score(y_test,nb_pred)
        # average='weighted' : Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label).
        pre = precision_score(y_test, nb_pred, average='weighted')
        recall = recall_score(y_test, nb_pred, average='weighted')
        f1 = f1_score(y_test, nb_pred, average='weighted')
        
        temp.append([i,j,acc,pre,recall,f1])
        
df_C = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score'],data=temp)
df_C

Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score
0,0.4,7,0.591837,0.549201,0.591837,0.543739
1,0.4,69,0.561224,0.464063,0.561224,0.493861
2,0.4,101,0.612245,0.557681,0.612245,0.573198
3,0.3,7,0.540541,0.459406,0.540541,0.482396
4,0.3,69,0.540541,0.450975,0.540541,0.482604
5,0.3,101,0.608108,0.56661,0.608108,0.571889
6,0.2,7,0.530612,0.417811,0.530612,0.452463
7,0.2,69,0.612245,0.589116,0.612245,0.581721
8,0.2,101,0.693878,0.669455,0.693878,0.667032


In [102]:
df_C.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.2,0.612245,0.558794,0.612245,0.567072
0.3,0.563063,0.49233,0.563063,0.512296
0.4,0.588435,0.523648,0.588435,0.536933


### Overall_rating_V1 `(Observer)` for `Sequential/Global` learners (60 Train - 40 Test) | (70 Train - 30 Test) | (80 Train - 20 Test)

In [103]:
temp = []
rand_states = [7,69,101]
y = training[['Overall_Rating_V1']]
X = training[['CGPA_Class',
              'Watch_Status_V1',
              'S/G_Score','Quiz','part_day_V1-1', 'part_day_V1-2', 'part_day_V1-3', 'part_day_V1-4']]

oversample = RandomOverSampler(sampling_strategy='all')
X, y = oversample.fit_resample(X, y)

for i in [0.4,0.3,0.2]:
    for j in rand_states:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=i, random_state=j)
        param_grid = {}

        nb_model = GridSearchCV(GaussianNB(), param_grid, refit=True, cv=10, verbose=1)
        nb_model.fit(X_train, y_train.values.ravel())
        nb_pred = nb_model.predict(X_test)
        
        acc = accuracy_score(y_test,nb_pred)
        # average='weighted' : Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label).
        pre = precision_score(y_test, nb_pred, average='weighted')
        recall = recall_score(y_test, nb_pred, average='weighted')
        f1 = f1_score(y_test, nb_pred, average='weighted')
        
        temp.append([i,j,acc,pre,recall,f1])
        
df_D = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score'],data=temp)
df_D

Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score
0,0.4,7,0.571429,0.571736,0.571429,0.547399
1,0.4,69,0.622449,0.604163,0.622449,0.585338
2,0.4,101,0.683673,0.666508,0.683673,0.65762
3,0.3,7,0.581081,0.555011,0.581081,0.558397
4,0.3,69,0.567568,0.576577,0.567568,0.543535
5,0.3,101,0.72973,0.722222,0.72973,0.714159
6,0.2,7,0.530612,0.44914,0.530612,0.484292
7,0.2,69,0.612245,0.568867,0.612245,0.579794
8,0.2,101,0.755102,0.764966,0.755102,0.740973


In [104]:
df_D.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.2,0.632653,0.594324,0.632653,0.601686
0.3,0.626126,0.617937,0.626126,0.605364
0.4,0.62585,0.614136,0.62585,0.596785


### Overall_rating_V2 `(Strategy)` for `Active/Reflective` learners (60 Train - 40 Test) | (70 Train - 30 Test) | (80 Train - 20 Test)

In [105]:
temp = []
rand_states = [7,69,101]
y = training[['Overall_Rating_V2']]
X = training[['CGPA_Class',
              'Watch_Status_V2',
              'A/R_Score','Quiz','part_day_V2-1', 'part_day_V2-2', 'part_day_V2-3', 'part_day_V2-4']]

oversample = RandomOverSampler(sampling_strategy='all')
X, y = oversample.fit_resample(X, y)

for i in [0.4,0.3,0.2]:
    for j in rand_states:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=i, random_state=j)
        param_grid = {}

        nb_model = GridSearchCV(GaussianNB(), param_grid, refit=True, cv=10, verbose=1)
        nb_model.fit(X_train, y_train.values.ravel())
        nb_pred = nb_model.predict(X_test)
        
        acc = accuracy_score(y_test,nb_pred)
        # average='weighted' : Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label).
        pre = precision_score(y_test, nb_pred, average='weighted')
        recall = recall_score(y_test, nb_pred, average='weighted')
        f1 = f1_score(y_test, nb_pred, average='weighted')
        
        temp.append([i,j,acc,pre,recall,f1])
        
df_E = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score'],data=temp)
df_E

Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score
0,0.4,7,0.48,0.463942,0.48,0.466291
1,0.4,69,0.47,0.416922,0.47,0.431877
2,0.4,101,0.51,0.517454,0.51,0.481178
3,0.3,7,0.426667,0.419946,0.426667,0.412603
4,0.3,69,0.466667,0.415823,0.466667,0.430857
5,0.3,101,0.56,0.554022,0.56,0.537045
6,0.2,7,0.42,0.387244,0.42,0.387327
7,0.2,69,0.48,0.47559,0.48,0.466603
8,0.2,101,0.62,0.58581,0.62,0.58976


In [106]:
df_E.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.2,0.506667,0.482881,0.506667,0.48123
0.3,0.484444,0.463264,0.484444,0.460168
0.4,0.486667,0.466106,0.486667,0.459782


### Overall_rating_V2 `(Strategy)` for `Sensing/Intuitive` learners (60 Train - 40 Test) | (70 Train - 30 Test) | (80 Train - 20 Test)

In [107]:
temp = []
rand_states = [7,69,101]
y = training[['Overall_Rating_V2']]
X = training[['CGPA_Class',
              'Watch_Status_V2',
              'S/I_Score','Quiz','part_day_V2-1', 'part_day_V2-2', 'part_day_V2-3', 'part_day_V2-4']]

oversample = RandomOverSampler(sampling_strategy='all')
X, y = oversample.fit_resample(X, y)

for i in [0.4,0.3,0.2]:
    for j in rand_states:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=i, random_state=j)
        param_grid = {}

        nb_model = GridSearchCV(GaussianNB(), param_grid, refit=True, cv=10, verbose=1)
        nb_model.fit(X_train, y_train.values.ravel())
        nb_pred = nb_model.predict(X_test)
        
        acc = accuracy_score(y_test,nb_pred)
        # average='weighted' : Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label).
        pre = precision_score(y_test, nb_pred, average='weighted')
        recall = recall_score(y_test, nb_pred, average='weighted')
        f1 = f1_score(y_test, nb_pred, average='weighted')
        
        temp.append([i,j,acc,pre,recall,f1])
        
df_F = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score'],data=temp)
df_F

Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score
0,0.4,7,0.51,0.469792,0.51,0.481583
1,0.4,69,0.52,0.484037,0.52,0.490943
2,0.4,101,0.52,0.542755,0.52,0.508347
3,0.3,7,0.493333,0.480036,0.493333,0.452906
4,0.3,69,0.546667,0.539309,0.546667,0.518518
5,0.3,101,0.6,0.564764,0.6,0.563798
6,0.2,7,0.52,0.499053,0.52,0.466203
7,0.2,69,0.62,0.653333,0.62,0.608667
8,0.2,101,0.6,0.563333,0.6,0.572635


In [108]:
df_F.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.2,0.58,0.571906,0.58,0.549168
0.3,0.546667,0.528036,0.546667,0.511741
0.4,0.516667,0.498861,0.516667,0.493624


### Overall_rating_V2 `(Strategy)` for `Visual/Verbal` learners (60 Train - 40 Test) | (70 Train - 30 Test) | (80 Train - 20 Test)

In [109]:
temp = []
rand_states = [7,69,101]
y = training[['Overall_Rating_V2']]
X = training[['CGPA_Class',
              'Watch_Status_V2',
              'Vi/Vb_Score','Quiz','part_day_V2-1', 'part_day_V2-2', 'part_day_V2-3', 'part_day_V2-4']]

oversample = RandomOverSampler(sampling_strategy='all')
X, y = oversample.fit_resample(X, y)

for i in [0.4,0.3,0.2]:
    for j in rand_states:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=i, random_state=j)
        param_grid = {}

        nb_model = GridSearchCV(GaussianNB(), param_grid, refit=True, cv=10, verbose=1)
        nb_model.fit(X_train, y_train.values.ravel())
        nb_pred = nb_model.predict(X_test)
        
        acc = accuracy_score(y_test,nb_pred)
        # average='weighted' : Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label).
        pre = precision_score(y_test, nb_pred, average='weighted')
        recall = recall_score(y_test, nb_pred, average='weighted')
        f1 = f1_score(y_test, nb_pred, average='weighted')
        
        temp.append([i,j,acc,pre,recall,f1])
        
df_G = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score'],data=temp)
df_G

Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score
0,0.4,7,0.49,0.46336,0.49,0.470658
1,0.4,69,0.47,0.475034,0.47,0.448015
2,0.4,101,0.39,0.401387,0.39,0.369872
3,0.3,7,0.44,0.417869,0.44,0.416721
4,0.3,69,0.426667,0.419338,0.426667,0.411352
5,0.3,101,0.466667,0.454864,0.466667,0.443044
6,0.2,7,0.44,0.425905,0.44,0.396963
7,0.2,69,0.52,0.651765,0.52,0.518016
8,0.2,101,0.52,0.518095,0.52,0.496667


In [110]:
df_G.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.2,0.493333,0.531922,0.493333,0.470549
0.3,0.444444,0.43069,0.444444,0.423706
0.4,0.45,0.446594,0.45,0.429515


### Overall_rating_V2 `(Strategy)` for `Sequential/Global` learners (60 Train - 40 Test) | (70 Train - 30 Test) | (80 Train - 20 Test)

In [111]:
temp = []
rand_states = [7,69,101]
y = training[['Overall_Rating_V2']]
X = training[['CGPA_Class',
              'Watch_Status_V2',
              'S/G_Score','Quiz','part_day_V2-1', 'part_day_V2-2', 'part_day_V2-3', 'part_day_V2-4']]

oversample = RandomOverSampler(sampling_strategy='all')
X, y = oversample.fit_resample(X, y)

for i in [0.4,0.3,0.2]:
    for j in rand_states:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=i, random_state=j)
        param_grid = {}

        nb_model = GridSearchCV(GaussianNB(), param_grid, refit=True, cv=10, verbose=1)
        nb_model.fit(X_train, y_train.values.ravel())
        nb_pred = nb_model.predict(X_test)
        
        acc = accuracy_score(y_test,nb_pred)
        # average='weighted' : Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label).
        pre = precision_score(y_test, nb_pred, average='weighted')
        recall = recall_score(y_test, nb_pred, average='weighted')
        f1 = f1_score(y_test, nb_pred, average='weighted')
        
        temp.append([i,j,acc,pre,recall,f1])
        
df_H = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score'],data=temp)
df_H

Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score
0,0.4,7,0.54,0.528421,0.54,0.521708
1,0.4,69,0.55,0.573956,0.55,0.537942
2,0.4,101,0.59,0.610476,0.59,0.572316
3,0.3,7,0.493333,0.480254,0.493333,0.46468
4,0.3,69,0.573333,0.617725,0.573333,0.560973
5,0.3,101,0.613333,0.630962,0.613333,0.603704
6,0.2,7,0.52,0.532727,0.52,0.471782
7,0.2,69,0.66,0.690667,0.66,0.645707
8,0.2,101,0.68,0.681615,0.68,0.663058


In [112]:
df_H.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.2,0.62,0.635003,0.62,0.593516
0.3,0.56,0.576314,0.56,0.543119
0.4,0.56,0.570951,0.56,0.543989


### Overall_rating_V3 `(MVC)` for `Active/Reflective` learners (60 Train - 40 Test) | (70 Train - 30 Test) | (80 Train - 20 Test)

In [113]:
temp = []
rand_states = [7,69,101]
y = training[['Overall_Rating_V3']]
X = training[['CGPA_Class',
              'Watch_Status_V3',
              'A/R_Score','Quiz','part_day_V3-1', 'part_day_V3-2', 'part_day_V3-3', 'part_day_V3-4']]


oversample = RandomOverSampler(sampling_strategy='all')
X, y = oversample.fit_resample(X, y)

for i in [0.4,0.3,0.2]:
    for j in rand_states:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=i, random_state=j)
        param_grid = {}

        nb_model = GridSearchCV(GaussianNB(), param_grid, refit=True, cv=10, verbose=1)
        nb_model.fit(X_train, y_train.values.ravel())
        nb_pred = nb_model.predict(X_test)
        
        acc = accuracy_score(y_test,nb_pred)
        # average='weighted' : Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label).
        pre = precision_score(y_test, nb_pred, average='weighted')
        recall = recall_score(y_test, nb_pred, average='weighted')
        f1 = f1_score(y_test, nb_pred, average='weighted')
        
        temp.append([i,j,acc,pre,recall,f1])
        
df_I = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score'],data=temp)
df_I

Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score
0,0.4,7,0.508333,0.543631,0.508333,0.505375
1,0.4,69,0.583333,0.625246,0.583333,0.590008
2,0.4,101,0.508333,0.575121,0.508333,0.514533
3,0.3,7,0.533333,0.601287,0.533333,0.551415
4,0.3,69,0.577778,0.632636,0.577778,0.580975
5,0.3,101,0.566667,0.675562,0.566667,0.574855
6,0.2,7,0.466667,0.597215,0.466667,0.503862
7,0.2,69,0.6,0.665741,0.6,0.60136
8,0.2,101,0.583333,0.660593,0.583333,0.591567


In [114]:
df_I.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.2,0.55,0.641183,0.55,0.565597
0.3,0.559259,0.636495,0.559259,0.569082
0.4,0.533333,0.581333,0.533333,0.536639


### Overall_rating_V3 `(MVC)` for `Sensing/Intuitive` learners (60 Train - 40 Test) | (70 Train - 30 Test) | (80 Train - 20 Test)

In [115]:
temp = []
rand_states = [7,69,101]
y = training[['Overall_Rating_V3']]
X = training[['CGPA_Class',
              'Watch_Status_V3',
              'S/I_Score','Quiz','part_day_V3-1', 'part_day_V3-2', 'part_day_V3-3', 'part_day_V3-4']]

oversample = RandomOverSampler(sampling_strategy='all')
X, y = oversample.fit_resample(X, y)

for i in [0.4,0.3,0.2]:
    for j in rand_states:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=i, random_state=j)
        param_grid = {}

        nb_model = GridSearchCV(GaussianNB(), param_grid, refit=True, cv=10, verbose=1)
        nb_model.fit(X_train, y_train.values.ravel())
        nb_pred = nb_model.predict(X_test)
        
        acc = accuracy_score(y_test,nb_pred)
        # average='weighted' : Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label).
        pre = precision_score(y_test, nb_pred, average='weighted')
        recall = recall_score(y_test, nb_pred, average='weighted')
        f1 = f1_score(y_test, nb_pred, average='weighted')
        
        temp.append([i,j,acc,pre,recall,f1])
        
df_J = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score'],data=temp)
df_J

Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score
0,0.4,7,0.416667,0.482595,0.416667,0.423511
1,0.4,69,0.575,0.602282,0.575,0.574921
2,0.4,101,0.4,0.411424,0.4,0.387617
3,0.3,7,0.533333,0.646257,0.533333,0.548925
4,0.3,69,0.588889,0.599081,0.588889,0.587903
5,0.3,101,0.422222,0.418139,0.422222,0.394308
6,0.2,7,0.566667,0.751477,0.566667,0.593543
7,0.2,69,0.5,0.541386,0.5,0.496316
8,0.2,101,0.5,0.514859,0.5,0.491576


In [116]:
df_J.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.2,0.522222,0.602574,0.522222,0.527145
0.3,0.514815,0.554492,0.514815,0.510379
0.4,0.463889,0.498767,0.463889,0.462016


### Overall_rating_V3 `(MVC)` for `Visual/Verbal` learners (60 Train - 40 Test) | (70 Train - 30 Test) | (80 Train - 20 Test)

In [117]:
temp = []
rand_states = [7,69,101]
y = training[['Overall_Rating_V3']]
X = training[['CGPA_Class',
              'Watch_Status_V3',
              'Vi/Vb_Score','Quiz','part_day_V3-1', 'part_day_V3-2', 'part_day_V3-3', 'part_day_V3-4']]

oversample = RandomOverSampler(sampling_strategy='all')
X, y = oversample.fit_resample(X, y)

for i in [0.4,0.3,0.2]:
    for j in rand_states:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=i, random_state=j)
        param_grid = {}

        nb_model = GridSearchCV(GaussianNB(), param_grid, refit=True, cv=10, verbose=1)
        nb_model.fit(X_train, y_train.values.ravel())
        nb_pred = nb_model.predict(X_test)
        
        acc = accuracy_score(y_test,nb_pred)
        # average='weighted' : Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label).
        pre = precision_score(y_test, nb_pred, average='weighted')
        recall = recall_score(y_test, nb_pred, average='weighted')
        f1 = f1_score(y_test, nb_pred, average='weighted')
        
        temp.append([i,j,acc,pre,recall,f1])
        
df_K = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score'],data=temp)
df_K

Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score
0,0.4,7,0.458333,0.482964,0.458333,0.462529
1,0.4,69,0.566667,0.550886,0.566667,0.551332
2,0.4,101,0.55,0.629501,0.55,0.536363
3,0.3,7,0.5,0.530421,0.5,0.492169
4,0.3,69,0.555556,0.58699,0.555556,0.552852
5,0.3,101,0.588889,0.657744,0.588889,0.584486
6,0.2,7,0.516667,0.557015,0.516667,0.498818
7,0.2,69,0.533333,0.557593,0.533333,0.520728
8,0.2,101,0.616667,0.715599,0.616667,0.618463


In [118]:
df_K.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.2,0.555556,0.610069,0.555556,0.546003
0.3,0.548148,0.591718,0.548148,0.543169
0.4,0.525,0.55445,0.525,0.516741


### Overall_rating_V3 `(MVC)` for `Sequential/Global` learners (60 Train - 40 Test) | (70 Train - 30 Test) | (80 Train - 20 Test)

In [119]:
temp = []
rand_states = [7,69,101]
y = training[['Overall_Rating_V3']]
X = training[['CGPA_Class',
              'Watch_Status_V3',
              'S/G_Score','Quiz','part_day_V3-1', 'part_day_V3-2', 'part_day_V3-3', 'part_day_V3-4']]

oversample = RandomOverSampler(sampling_strategy='all')
X, y = oversample.fit_resample(X, y)

for i in [0.4,0.3,0.2]:
    for j in rand_states:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=i, random_state=j)
        param_grid = {}

        nb_model = GridSearchCV(GaussianNB(), param_grid, refit=True, cv=10, verbose=1)
        nb_model.fit(X_train, y_train.values.ravel())
        nb_pred = nb_model.predict(X_test)
        
        acc = accuracy_score(y_test,nb_pred)
        # average='weighted' : Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label).
        pre = precision_score(y_test, nb_pred, average='weighted')
        recall = recall_score(y_test, nb_pred, average='weighted')
        f1 = f1_score(y_test, nb_pred, average='weighted')
        
        temp.append([i,j,acc,pre,recall,f1])
        
df_L = pd.DataFrame(columns=['Test Size','Random State','Accuracy','Precision','Recall','F1 Score'],data=temp)
df_L

Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits
Fitting 10 folds for each of 1 candidates, totalling 10 fits


Unnamed: 0,Test Size,Random State,Accuracy,Precision,Recall,F1 Score
0,0.4,7,0.425,0.450937,0.425,0.416121
1,0.4,69,0.5,0.497934,0.5,0.477003
2,0.4,101,0.508333,0.55029,0.508333,0.505817
3,0.3,7,0.544444,0.573277,0.544444,0.545163
4,0.3,69,0.544444,0.558727,0.544444,0.526325
5,0.3,101,0.477778,0.507447,0.477778,0.470001
6,0.2,7,0.533333,0.559206,0.533333,0.532669
7,0.2,69,0.533333,0.630179,0.533333,0.51287
8,0.2,101,0.533333,0.557576,0.533333,0.515757


In [120]:
df_L.groupby('Test Size')[['Accuracy','Precision','Recall','F1 Score']].mean()

Unnamed: 0_level_0,Accuracy,Precision,Recall,F1 Score
Test Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.2,0.533333,0.582321,0.533333,0.520432
0.3,0.522222,0.546484,0.522222,0.51383
0.4,0.477778,0.499721,0.477778,0.466314
