In [None]:
import pandas as pd
import numpy as np

import pandas as pd
from sklearn.metrics import mean_squared_error, classification_report, cohen_kappa_score, mean_absolute_error, accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from sklearn.model_selection import train_test_split

In [None]:
!pip install vecstack

Collecting vecstack
  Downloading vecstack-0.4.0.tar.gz (18 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: vecstack
  Building wheel for vecstack (setup.py) ... [?25l[?25hdone
  Created wheel for vecstack: filename=vecstack-0.4.0-py3-none-any.whl size=19860 sha256=9bbca0aea32abd10d55ee028dc56ef14595b35e21727867e2c28605f8815ffac
  Stored in directory: /root/.cache/pip/wheels/b8/d8/51/3cf39adf22c522b0a91dc2208db4e9de4d2d9d171683596220
Successfully built vecstack
Installing collected packages: vecstack
Successfully installed vecstack-0.4.0


## Reading Datasets

In [None]:
map_cols = {'av':{'N': 0.0, 'L': 1.0, 'A': 2.0, 'P': 3.0,
                  # 'R':4.0, 'W':5.0, 'D':6.0, 'M':0.7, 'S':0.8   ## The second lines refer to the outliers
                  },
            'ac':{'L': 0.0, 'H': 1.0,
                  # 'P':2.0, 'M':3.0
                  },
            'pr':{'L': 0.0, 'H': 1.0, 'N': 2.0,
                  # 'P':0.3
                  },
            'ui':{'R': 0.0, 'N': 1.0,
                  # 'L':2.0
                  },
            's':{'C': 0.0, 'U': 1.0,
                #  'L':2.0, 'R':3.0
                 },
            'c':{'H': 0.0, 'N': 1.0, 'L': 2.0,
                #  'I':3.0,'R':4.0, 'C':5.0
                 },
            'i':{'H': 0.0, 'N': 1.0, 'L': 2.0,
                #  'C':3.0, 'P':4.0, 'R':5.0, 'A':6.0
                 },
            'a':{'H': 0.0, 'N': 1.0, 'L': 2.0,
                #  'R':3.0, 'P':4.0, 'C':5.0
                 }}

# outliers = {'av':{'R':4.0, 'W':5.0, 'D':6.0, 'M':0.7, 'S':0.8},
#             'ac':{'P':2.0, 'M':3.0},
#             'pr':{'P':0.3},
#             'ui':{'L':2.0},
#             's':{'L':2.0, 'R':3.0},
#             'c':{'I':3.0,'R':4.0, 'C':5.0},
#             'i':{'C':3.0, 'P':4.0, 'R':5.0, 'A':6.0},
#             'a':{'R':3.0, 'P':4.0, 'C':5.0}
#             }

def read_files(path):
  data = pd.read_csv(path)

  for column in map_cols.keys():
      data[column] = data[column].map(map_cols[column])

  return data

In [None]:
gpt4_folder = "/Ensemble LLMs/Results/GPT4/"
gpt35_folder = "/Ensemble LLMs/Results/GPT3.5FineTuned/"

gpt4_files = ['classified_GPT35turbo_gpt-4-0613_6370.csv', 'classified_GPT4_gpt-4-0613_6370.csv', 'classified_Original_gpt-4-0613_6370.csv']
gpt35_files = ['classified_GPT4_FineTunedGPT35turbo_0_6370.csv', 'classified_Original_FineTunedGPT35turbo_0_6370.csv'] ## Issue: Wrong labels

original_labels = '/Ensemble LLMs/Results/formatted_Sample_Original_0_6370.csv'

In [None]:
gpt4_original_df = read_files(gpt4_folder + gpt4_files[2])
gpt4_gpt4_df = read_files(gpt4_folder + gpt4_files[1])
gpt4_gpt35_df = read_files(gpt4_folder + gpt4_files[0])

gpt35_original_df = read_files(gpt35_folder + gpt35_files[1])
gpt35_gpt4_df = read_files(gpt35_folder + gpt35_files[0])

original_df = read_files(original_labels)

#############
gpt4_original_vec = pd.concat([gpt4_original_df[col] for col in list(gpt4_original_df.columns)[-8:]], axis=1)
gpt4_gpt4_vec = pd.concat([gpt4_gpt4_df[col] for col in list(gpt4_gpt4_df.columns)[-8:]], axis=1)
gpt4_gpt35_vec = pd.concat([gpt4_gpt35_df[col] for col in list(gpt4_gpt35_df.columns)[-8:]], axis=1)

gpt35_original_vec = pd.concat([gpt35_original_df[col] for col in list(gpt35_original_df.columns)[-8:]], axis=1)
gpt35_gpt4_vec = pd.concat([gpt35_gpt4_df[col] for col in list(gpt35_gpt4_df.columns)[-8:]], axis=1)

original_vec = pd.concat([original_df[col] for col in list(original_df.columns)[-10:-2]], axis=1)


In [None]:
accuracies = {col:dict() for col in list(gpt4_original_vec.columns)}

data = {'gpt4_original':gpt4_original_vec, 'gpt4_gpt4':gpt4_gpt4_vec, 'gpt4_gpt35':gpt4_gpt35_vec,
        'gpt35_original':gpt35_original_vec, 'gpt35_gpt4': gpt35_gpt4_vec, 'original':original_vec}

for col in list(gpt4_original_vec.columns):
  for gpt in list(data.keys())[0:-1]:
    gpt_data = data[gpt][col]
    original = data['original'][col]

    array = np.array(pd.concat([gpt_data, original], axis=1))
    array = np.array(array)[~np.isnan(array).any(axis=1)]

    y_pred = array[:,0]
    y_true = array[:,1]

    accuracies[col][gpt] = accuracy_score(y_true, y_pred)

  normalization_factor=1.0/sum(accuracies[col].values())
  for k in accuracies[col]:
    accuracies[col][k] = accuracies[col][k]*normalization_factor

  print(col, accuracies[col])

av {'gpt4_original': 0.19429472187383512, 'gpt4_gpt4': 0.19848020907973404, 'gpt4_gpt35': 0.1977826278787509, 'gpt35_original': 0.20380191299453912, 'gpt35_gpt4': 0.20564052817314057}
ac {'gpt4_original': 0.19336080819033244, 'gpt4_gpt4': 0.19364053449512858, 'gpt4_gpt35': 0.19801125800756827, 'gpt35_original': 0.20814221600202154, 'gpt35_gpt4': 0.20684518330494917}
pr {'gpt4_original': 0.19879332819399823, 'gpt4_gpt4': 0.19831189127405313, 'gpt4_gpt35': 0.19658674231091652, 'gpt35_original': 0.20270848827977758, 'gpt35_gpt4': 0.20359954994125448}
ui {'gpt4_original': 0.19625258726505365, 'gpt4_gpt4': 0.198252084982287, 'gpt4_gpt35': 0.19542260708054168, 'gpt35_original': 0.20586741418455867, 'gpt35_gpt4': 0.20420530648755908}
s {'gpt4_original': 0.19125734831851374, 'gpt4_gpt4': 0.1903958287314934, 'gpt4_gpt35': 0.19000096558744242, 'gpt35_original': 0.21540544131405698, 'gpt35_gpt4': 0.21294041604849354}
c {'gpt4_original': 0.1949730046990862, 'gpt4_gpt4': 0.1856337196037723, 'gpt4_g

In [None]:
columns = list(gpt4_original_vec.columns)
Dataset = dict()
weighted = True
num_datasets = 2 ## 2 or 5
vector = False #True

if vector:
  for col in columns:
    if weighted:
      if num_datasets == 2:
        w1 = accuracies[col]['gpt4_original']
        w2 = accuracies[col]['gpt35_original']
        dfs = [w1*gpt4_original_vec[col], w2*gpt35_original_vec[col], original_vec[col]]
      else:
        w1 = accuracies[col]['gpt4_original']
        w2 = accuracies[col]['gpt4_gpt4']
        w3 = accuracies[col]['gpt4_gpt35']
        w4 = accuracies[col]['gpt35_original']
        w5 = accuracies[col]['gpt35_gpt4']
        dfs = [w1*gpt4_original_vec[col], w2*gpt4_gpt4_vec[col], w3*gpt4_gpt35_vec[col],
              w4*gpt35_original_vec[col], w5*gpt35_gpt4_vec[col], original_vec[col]]
    else:
      if num_datasets == 2:
        dfs = [gpt4_original_vec[col], gpt35_original_vec[col], original_vec[col]]
      else:
        dfs = [gpt4_original_vec[col], gpt4_gpt4_vec[col], gpt4_gpt35_vec[col], gpt35_original_vec[col], gpt35_gpt4_vec[col], original_vec[col]]

    data = np.array(pd.concat(dfs, axis=1))
    data = data[~np.isnan(data).any(axis=1)]
    Dataset[col] = data

  X_vectors = dict()
  Y_vectors = dict()
  for col in columns:
    X_vectors[col] = Dataset[col][:,:-1]
    Y_vectors[col] = Dataset[col][:,-1]

else:
  for col in columns:
    if weighted:
      if num_datasets == 2:
        w1 = accuracies[col]['gpt4_original']
        w2 = accuracies[col]['gpt35_original']
        dfs = [(w1*gpt4_original_vec[col] + w2*gpt35_original_vec[col])/2, original_vec[col]]
      else:
        w1 = accuracies[col]['gpt4_original']
        w2 = accuracies[col]['gpt4_gpt4']
        w3 = accuracies[col]['gpt4_gpt35']
        w4 = accuracies[col]['gpt35_original']
        w5 = accuracies[col]['gpt35_gpt4']
        dfs = [(w1*gpt4_original_vec[col]+ w2*gpt4_gpt4_vec[col]+ w3*gpt4_gpt35_vec[col]+
              w4*gpt35_original_vec[col]+ w5*gpt35_gpt4_vec[col])/5, original_vec[col]]
    else:
      if num_datasets == 2:
        dfs = [(gpt4_original_vec[col] + gpt35_original_vec[col])/2, original_vec[col]]
      else:
        dfs = [(gpt4_original_vec[col]+ gpt4_gpt4_vec[col]+ gpt4_gpt35_vec[col]+
                gpt35_original_vec[col]+ gpt35_gpt4_vec[col])/5, original_vec[col]]

    data = np.array(pd.concat(dfs, axis=1))
    data = data[~np.isnan(data).any(axis=1)]
    Dataset[col] = data

  X_vectors = dict()
  Y_vectors = dict()
  for col in columns:
    X_vectors[col] = Dataset[col][:,:-1]
    Y_vectors[col] = Dataset[col][:,-1]

In [None]:
len(dfs)

2

In [None]:
for col in columns:
  print(col, X_vectors[col].shape)

av (6352, 1)
ac (6364, 1)
pr (6366, 1)
ui (6367, 1)
s (6363, 1)
c (6362, 1)
i (6350, 1)
a (6364, 1)


In [None]:
X_vectors['av']

array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]])

In [None]:
# Splitting between train data into training and validation dataset
train_test_df = dict()
train_validation_test = dict()
for col in columns:
  X_train, X_test, y_train, y_test = train_test_split(np.array(X_vectors[col]), np.array(Y_vectors[col]), test_size=0.20)
  train_test_df[col] = (X_train, X_test, y_train, y_test)
  X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.20)
  train_validation_test[col] = (X_train, X_test, X_val, y_train, y_test, y_val)

Output Files

In [None]:
output_dir = '/Ensemble LLMs/'

if weighted:
  #  output_name = f"_results_{len(dfs)} Descs_weighted.csv"
   output_name = f"_results_{num_datasets} Descs_weighted_{'Vector' if vector else 'Averaged'}.csv"
else:
  # output_name = f"_results_{len(dfs)} Descs.csv"
  output_name = f"_results_{num_datasets} Descs_weighted_{'Vector' if vector else 'Averaged'}.csv"
output_name

'_results_2 Descs_weighted_Averaged.csv'

## Stacking

In [None]:
from numpy import mean
from numpy import std
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from vecstack import stacking
import copy

Model_names = ['KNN', 'DT', 'SVC', 'RF', 'XGB', 'GNB']

total_results = []
for model_index in range(len(Model_names)):
  stacking_predictions = dict()
  stacking_MSE = dict()
  stacking_MAE = dict()

  for col in columns:
    X_train, X_test, y_train, y_test = train_test_df[col]
    print(set(y_train))

    if len(set(y_train))>2:
        objective ='multi:softmax'
    elif len(set(y_train))<=2:
        objective='binary:logistic'

    print(objective)
    level_0_models = [KNeighborsClassifier(), DecisionTreeClassifier(), SVC(), RandomForestClassifier(),
                xgb.XGBClassifier(objective=objective), GaussianNB()]

    level_1_models = [KNeighborsClassifier(), DecisionTreeClassifier(), SVC(), RandomForestClassifier(),
                      xgb.XGBClassifier(objective=objective), GaussianNB()]

    s_train, s_test = stacking(level_0_models, X_train=X_train, X_test=X_test, y_train=y_train, regression=False, shuffle=True, n_folds=5)

    level_1_model = level_1_models[model_index]

    final_model = level_1_model.fit(s_train, y_train)
    pred_final = final_model.predict(s_test)

    stacking_MSE[col] = mean_squared_error(y_test, pred_final)
    stacking_MAE[col] = mean_absolute_error(y_test, pred_final)
    stacking_predictions[col] = pred_final
    # print(set(y_test), set(pred_final))
    # print(f"Col: {col}; MSE: {stacking_MSE[col]}; MAE: {stacking_MAE[col]}")

    accuracy = accuracy_score(y_test, pred_final)
    precision_weighted = precision_score(y_test, pred_final, average='weighted')
    precision_macro = precision_score(y_test, pred_final, average='macro')

    recall_weighted = recall_score(y_test, pred_final, average='weighted')
    recall_macro = recall_score(y_test, pred_final, average='macro')

    f1score_weighted = f1_score(y_test, pred_final, average='weighted')
    f1score_macro = f1_score(y_test, pred_final, average='macro')

    # print(classification_report(y_test, pred_final))

    models_0 = ""
    for item in Model_names:
      models_0 += ', '+item

    model_1 = Model_names[model_index]

    result_dict = {'Models_0': models_0,
                   'Model_1' : model_1,
                   'col':col,
                   'MSE': stacking_MSE[col],
                   'MAE': stacking_MAE[col],
                   'Accuracy': accuracy,
                   'W_precision': precision_weighted,
                   'W_recall': recall_weighted,
                   'W_F1score': f1score_weighted,
                   'M_precision': precision_macro,
                   'M_recall': recall_macro,
                   'M_F1score': f1score_macro,
                   }
    print(result_dict)
    total_results.append(result_dict)
    # print("====================")

{0.0, 1.0, 2.0, 3.0}
multi:softmax
{'Models_0': ', KNN, DT, SVC, RF, XGB, GNB', 'Model_1': 'KNN', 'col': 'av', 'MSE': 0.1998426435877262, 'MAE': 0.15420928402832415, 'Accuracy': 0.8662470495672698, 'W_precision': 0.8658921828839624, 'W_recall': 0.8662470495672698, 'W_F1score': 0.8586538872156277, 'M_precision': 0.9122237915341364, 'M_recall': 0.5579225300383129, 'M_F1score': 0.6277633464147693}
{0.0, 1.0}
binary:logistic
{'Models_0': ', KNN, DT, SVC, RF, XGB, GNB', 'Model_1': 'KNN', 'col': 'ac', 'MSE': 0.05813040062843677, 'MAE': 0.05813040062843677, 'Accuracy': 0.9418695993715632, 'W_precision': 0.93155125381858, 'W_recall': 0.9418695993715632, 'W_F1score': 0.9338307690706988, 'M_precision': 0.781527940186386, 'M_recall': 0.6590972806279787, 'M_F1score': 0.7000700458481917}
{0.0, 1.0, 2.0}
multi:softmax
{'Models_0': ', KNN, DT, SVC, RF, XGB, GNB', 'Model_1': 'KNN', 'col': 'pr', 'MSE': 0.6459968602825745, 'MAE': 0.34301412872841447, 'Accuracy': 0.8084772370486656, 'W_precision': 0.8083

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'Models_0': ', KNN, DT, SVC, RF, XGB, GNB', 'Model_1': 'KNN', 'col': 's', 'MSE': 0.1720345640219953, 'MAE': 0.1720345640219953, 'Accuracy': 0.8279654359780048, 'W_precision': 0.6855267631742474, 'W_recall': 0.8279654359780048, 'W_F1score': 0.7500434632753047, 'M_precision': 0.4139827179890024, 'M_recall': 0.5, 'M_F1score': 0.4529437043403524}
{0.0, 1.0, 2.0}
multi:softmax
{'Models_0': ', KNN, DT, SVC, RF, XGB, GNB', 'Model_1': 'KNN', 'col': 'c', 'MSE': 0.3904163393558523, 'MAE': 0.24901806755695208, 'Accuracy': 0.821681068342498, 'W_precision': 0.8252235636003082, 'W_recall': 0.821681068342498, 'W_F1score': 0.8122274929739052, 'M_precision': 0.8298838257550188, 'M_recall': 0.739920503374023, 'M_F1score': 0.7682710582668243}
{0.0, 1.0, 2.0}
multi:softmax
{'Models_0': ', KNN, DT, SVC, RF, XGB, GNB', 'Model_1': 'KNN', 'col': 'i', 'MSE': 0.3826771653543307, 'MAE': 0.25826771653543307, 'Accuracy': 0.8039370078740158, 'W_precision': 0.8279257359759103, 'W_recall': 0.8039370078740158, 'W_F1s

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'Models_0': ', KNN, DT, SVC, RF, XGB, GNB', 'Model_1': 'GNB', 'col': 'pr', 'MSE': 0.6797488226059655, 'MAE': 0.3751962323390895, 'Accuracy': 0.7770800627943485, 'W_precision': 0.7184851738101435, 'W_recall': 0.7770800627943485, 'W_F1score': 0.7461771458675482, 'M_precision': 0.49508886462167445, 'M_recall': 0.5252179645653069, 'M_F1score': 0.5093098164305595}
{0.0, 1.0}
binary:logistic
{'Models_0': ', KNN, DT, SVC, RF, XGB, GNB', 'Model_1': 'GNB', 'col': 'ui', 'MSE': 0.14521193092621665, 'MAE': 0.14521193092621665, 'Accuracy': 0.8547880690737834, 'W_precision': 0.8536771966540238, 'W_recall': 0.8547880690737834, 'W_F1score': 0.8529617308843465, 'M_precision': 0.8495882407259723, 'M_recall': 0.8324074074074075, 'M_F1score': 0.8395167731493429}
{0.0, 1.0}
binary:logistic
{'Models_0': ', KNN, DT, SVC, RF, XGB, GNB', 'Model_1': 'GNB', 'col': 's', 'MSE': 0.07148468185388845, 'MAE': 0.07148468185388845, 'Accuracy': 0.9285153181461115, 'W_precision': 0.9291959713035559, 'W_recall': 0.9285153

In [None]:
total_results

[{'Models_0': ', KNN, DT, SVC, RF, XGB, GNB',
  'Model_1': 'KNN',
  'col': 'av',
  'MSE': 0.1998426435877262,
  'MAE': 0.15420928402832415,
  'Accuracy': 0.8662470495672698,
  'W_precision': 0.8658921828839624,
  'W_recall': 0.8662470495672698,
  'W_F1score': 0.8586538872156277,
  'M_precision': 0.9122237915341364,
  'M_recall': 0.5579225300383129,
  'M_F1score': 0.6277633464147693},
 {'Models_0': ', KNN, DT, SVC, RF, XGB, GNB',
  'Model_1': 'KNN',
  'col': 'ac',
  'MSE': 0.05813040062843677,
  'MAE': 0.05813040062843677,
  'Accuracy': 0.9418695993715632,
  'W_precision': 0.93155125381858,
  'W_recall': 0.9418695993715632,
  'W_F1score': 0.9338307690706988,
  'M_precision': 0.781527940186386,
  'M_recall': 0.6590972806279787,
  'M_F1score': 0.7000700458481917},
 {'Models_0': ', KNN, DT, SVC, RF, XGB, GNB',
  'Model_1': 'KNN',
  'col': 'pr',
  'MSE': 0.6459968602825745,
  'MAE': 0.34301412872841447,
  'Accuracy': 0.8084772370486656,
  'W_precision': 0.8083956373554094,
  'W_recall': 0.8

In [None]:
total_results_df = pd.DataFrame(total_results)
total_results_df.to_csv(output_dir + 'Stacking'+output_name)

In [None]:
total_results_df

Unnamed: 0,Models_0,Model_1,col,MSE,MAE,Accuracy,W_precision,W_recall,W_F1score,M_precision,M_recall,M_F1score
0,", KNN, DT, SVC, RF, XGB, GNB",KNN,av,0.199843,0.154209,0.866247,0.865892,0.866247,0.858654,0.912224,0.557923,0.627763
1,", KNN, DT, SVC, RF, XGB, GNB",KNN,ac,0.05813,0.05813,0.94187,0.931551,0.94187,0.933831,0.781528,0.659097,0.70007
2,", KNN, DT, SVC, RF, XGB, GNB",KNN,pr,0.645997,0.343014,0.808477,0.808396,0.808477,0.800984,0.828681,0.681811,0.730403
3,", KNN, DT, SVC, RF, XGB, GNB",KNN,ui,0.169545,0.169545,0.830455,0.846295,0.830455,0.819494,0.862415,0.777828,0.797185
4,", KNN, DT, SVC, RF, XGB, GNB",KNN,s,0.172035,0.172035,0.827965,0.685527,0.827965,0.750043,0.413983,0.5,0.452944
5,", KNN, DT, SVC, RF, XGB, GNB",KNN,c,0.390416,0.249018,0.821681,0.825224,0.821681,0.812227,0.829884,0.739921,0.768271
6,", KNN, DT, SVC, RF, XGB, GNB",KNN,i,0.382677,0.258268,0.803937,0.827926,0.803937,0.798082,0.83795,0.769361,0.787055
7,", KNN, DT, SVC, RF, XGB, GNB",KNN,a,0.181461,0.159466,0.851532,0.858586,0.851532,0.842319,0.908541,0.588654,0.616476
8,", KNN, DT, SVC, RF, XGB, GNB",DT,av,0.184107,0.147915,0.869394,0.870912,0.869394,0.863298,0.910718,0.56515,0.631034
9,", KNN, DT, SVC, RF, XGB, GNB",DT,ac,0.05813,0.05813,0.94187,0.931551,0.94187,0.933831,0.781528,0.659097,0.70007


## Bagging (Bootstrapping)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import BaggingClassifier

Model = [KNeighborsClassifier(), DecisionTreeClassifier(), SVC(), RandomForestClassifier(),
                xgb.XGBClassifier(), GaussianNB()]

Model_names = ['KNN', 'DT', 'SVC', 'RF', 'XGB', 'GNB']

# estimator = Model[0]

total_results_bagging = []
for index, estimator in enumerate(Model):
  estimator_name = Model_names[index]

  bagging_predictions = dict()
  bagging_MSE = dict()
  bagging_MAE = dict()

  for col in columns:
    X_train, X_test, y_train, y_test = train_test_df[col]
    print(set(y_train))

    if estimator_name == 'XGB':
      if len(set(y_train))>2:
          objective ='multi:softmax'
      elif len(set(y_train))<=2:
          objective='binary:logistic'
      print(objective)
      estimator = xgb.XGBClassifier(objective=objective)

    bagging_model = BaggingClassifier(estimator=estimator, n_estimators=100, random_state=0).fit(X_train, y_train)
    pred_final = bagging_model.predict(X_test)

    bagging_MSE[col] = mean_squared_error(y_test, pred_final)
    bagging_MAE[col] = mean_absolute_error(y_test, pred_final)
    bagging_predictions[col] = pred_final


    # print(set(y_test), set(pred_final))

    # print(f"Col: {col}; MSE: {bagging_MSE[col]}; MAE: {bagging_MAE[col]}")

    # print(classification_report(y_test, pred_final))
    # print("====================")

    accuracy = accuracy_score(y_test, pred_final)
    precision_weighted = precision_score(y_test, pred_final, average='weighted')
    precision_macro = precision_score(y_test, pred_final, average='macro')

    recall_weighted = recall_score(y_test, pred_final, average='weighted')
    recall_macro = recall_score(y_test, pred_final, average='macro')

    f1score_weighted = f1_score(y_test, pred_final, average='weighted')
    f1score_macro = f1_score(y_test, pred_final, average='macro')

    # print(classification_report(y_test, pred_final))

    print(estimator)

    result_dict = {'Estimator' : estimator_name,
                   'col':col,
                   'MSE': bagging_MSE[col],
                   'MAE': bagging_MAE[col],
                   'Accuracy': accuracy,
                   'W_precision': precision_weighted,
                   'W_recall': recall_weighted,
                   'W_F1score': f1score_weighted,
                   'M_precision': precision_macro,
                   'M_recall': recall_macro,
                   'M_F1score': f1score_macro,
                   }
    print(result_dict)
    total_results_bagging.append(result_dict)

{0.0, 1.0, 2.0, 3.0}
KNeighborsClassifier()
{'Estimator': 'KNN', 'col': 'av', 'MSE': 0.1966955153422502, 'MAE': 0.15263571990558616, 'Accuracy': 0.8670338316286389, 'W_precision': 0.8624089452543834, 'W_recall': 0.8670338316286389, 'W_F1score': 0.8628054070664513, 'M_precision': 0.7889498432601881, 'M_recall': 0.6025892603668079, 'M_F1score': 0.6655110481997124}
{0.0, 1.0}
KNeighborsClassifier()
{'Estimator': 'KNN', 'col': 'ac', 'MSE': 0.05813040062843677, 'MAE': 0.05813040062843677, 'Accuracy': 0.9418695993715632, 'W_precision': 0.93155125381858, 'W_recall': 0.9418695993715632, 'W_F1score': 0.9338307690706988, 'M_precision': 0.781527940186386, 'M_recall': 0.6590972806279787, 'M_F1score': 0.7000700458481917}
{0.0, 1.0, 2.0}
KNeighborsClassifier()
{'Estimator': 'KNN', 'col': 'pr', 'MSE': 0.6459968602825745, 'MAE': 0.34301412872841447, 'Accuracy': 0.8084772370486656, 'W_precision': 0.8083956373554094, 'W_recall': 0.8084772370486656, 'W_F1score': 0.8009843130724408, 'M_precision': 0.82868

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


SVC()
{'Estimator': 'SVC', 'col': 'av', 'MSE': 0.2155782848151062, 'MAE': 0.16679779701022818, 'Accuracy': 0.8544453186467349, 'W_precision': 0.8352594438680944, 'W_recall': 0.8544453186467349, 'W_F1score': 0.8415781948603175, 'M_precision': 0.6571216897338653, 'M_recall': 0.5089445041288455, 'M_F1score': 0.5564978505123974}
{0.0, 1.0}
SVC()
{'Estimator': 'SVC', 'col': 'ac', 'MSE': 0.05813040062843677, 'MAE': 0.05813040062843677, 'Accuracy': 0.9418695993715632, 'W_precision': 0.93155125381858, 'W_recall': 0.9418695993715632, 'W_F1score': 0.9338307690706988, 'M_precision': 0.781527940186386, 'M_recall': 0.6590972806279787, 'M_F1score': 0.7000700458481917}
{0.0, 1.0, 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


SVC()
{'Estimator': 'SVC', 'col': 'pr', 'MSE': 0.6797488226059655, 'MAE': 0.3751962323390895, 'Accuracy': 0.7770800627943485, 'W_precision': 0.7184851738101435, 'W_recall': 0.7770800627943485, 'W_F1score': 0.7461771458675482, 'M_precision': 0.49508886462167445, 'M_recall': 0.5252179645653069, 'M_F1score': 0.5093098164305595}
{0.0, 1.0}
SVC()
{'Estimator': 'SVC', 'col': 'ui', 'MSE': 0.1695447409733124, 'MAE': 0.1695447409733124, 'Accuracy': 0.8304552590266876, 'W_precision': 0.84629534702421, 'W_recall': 0.8304552590266876, 'W_F1score': 0.8194939932153206, 'M_precision': 0.8624149659863946, 'M_recall': 0.777828331204768, 'M_F1score': 0.7971845933874795}
{0.0, 1.0}
SVC()
{'Estimator': 'SVC', 'col': 's', 'MSE': 0.07148468185388845, 'MAE': 0.07148468185388845, 'Accuracy': 0.9285153181461115, 'W_precision': 0.9291959713035559, 'W_recall': 0.9285153181461115, 'W_F1score': 0.9288337511593536, 'M_precision': 0.8723614326569522, 'M_recall': 0.8790560855362914, 'M_F1score': 0.8756554379479385}
{

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


GaussianNB()
{'Estimator': 'GNB', 'col': 'av', 'MSE': 0.24862313139260425, 'MAE': 0.18568056648308418, 'Accuracy': 0.8402832415420929, 'W_precision': 0.8266527286352593, 'W_recall': 0.8402832415420929, 'W_F1score': 0.8325698668952356, 'M_precision': 0.48561732395235446, 'M_recall': 0.5635928125312963, 'M_F1score': 0.5112116081158444}
{0.0, 1.0}
GaussianNB()
{'Estimator': 'GNB', 'col': 'ac', 'MSE': 0.13668499607227022, 'MAE': 0.13668499607227022, 'Accuracy': 0.8633150039277298, 'W_precision': 0.9103075477260356, 'W_recall': 0.8633150039277298, 'W_F1score': 0.8832905397822916, 'M_precision': 0.5936809568898902, 'M_recall': 0.6723637310264728, 'M_F1score': 0.6141080139372822}
{0.0, 1.0, 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


GaussianNB()
{'Estimator': 'GNB', 'col': 'pr', 'MSE': 0.6797488226059655, 'MAE': 0.3751962323390895, 'Accuracy': 0.7770800627943485, 'W_precision': 0.7184851738101435, 'W_recall': 0.7770800627943485, 'W_F1score': 0.7461771458675482, 'M_precision': 0.49508886462167445, 'M_recall': 0.5252179645653069, 'M_F1score': 0.5093098164305595}
{0.0, 1.0}
GaussianNB()
{'Estimator': 'GNB', 'col': 'ui', 'MSE': 0.16326530612244897, 'MAE': 0.16326530612244897, 'Accuracy': 0.8367346938775511, 'W_precision': 0.8486703362046998, 'W_recall': 0.8367346938775511, 'W_F1score': 0.8390462410260547, 'M_precision': 0.8240885966499769, 'M_recall': 0.8430661983822904, 'M_F1score': 0.8295248682476944}
{0.0, 1.0}
GaussianNB()
{'Estimator': 'GNB', 'col': 's', 'MSE': 0.07148468185388845, 'MAE': 0.07148468185388845, 'Accuracy': 0.9285153181461115, 'W_precision': 0.9291959713035559, 'W_recall': 0.9285153181461115, 'W_F1score': 0.9288337511593536, 'M_precision': 0.8723614326569522, 'M_recall': 0.8790560855362914, 'M_F1sco

In [None]:
baggin_total_results_df = pd.DataFrame(total_results_bagging)
baggin_total_results_df.to_csv(output_dir + 'Bagging'+output_name)

In [None]:
baggin_total_results_df

Unnamed: 0,Estimator,col,MSE,MAE,Accuracy,W_precision,W_recall,W_F1score,M_precision,M_recall,M_F1score
0,KNN,av,0.196696,0.152636,0.867034,0.862409,0.867034,0.862805,0.78895,0.602589,0.665511
1,KNN,ac,0.05813,0.05813,0.94187,0.931551,0.94187,0.933831,0.781528,0.659097,0.70007
2,KNN,pr,0.645997,0.343014,0.808477,0.808396,0.808477,0.800984,0.828681,0.681811,0.730403
3,KNN,ui,0.145212,0.145212,0.854788,0.853677,0.854788,0.852962,0.849588,0.832407,0.839517
4,KNN,s,0.071485,0.071485,0.928515,0.929196,0.928515,0.928834,0.872361,0.879056,0.875655
5,KNN,c,0.35978,0.230951,0.833464,0.831858,0.833464,0.832463,0.805063,0.793347,0.798961
6,KNN,i,0.362992,0.248031,0.809449,0.823458,0.809449,0.804763,0.830294,0.777004,0.792862
7,KNN,a,0.181461,0.159466,0.851532,0.858586,0.851532,0.842319,0.908541,0.588654,0.616476
8,DT,av,0.196696,0.152636,0.867034,0.862409,0.867034,0.862805,0.78895,0.602589,0.665511
9,DT,ac,0.05813,0.05813,0.94187,0.931551,0.94187,0.933831,0.781528,0.659097,0.70007


In [None]:
bagging_MAE

{'av': 0.18568056648308418,
 'ac': 0.13668499607227022,
 'pr': 0.3751962323390895,
 'ui': 0.16326530612244897,
 's': 0.07148468185388845,
 'c': 0.32050274941084056,
 'i': 0.2614173228346457,
 'a': 0.1633935585231736}

In [None]:
bagging_MSE

{'av': 0.24862313139260425,
 'ac': 0.13668499607227022,
 'pr': 0.6797488226059655,
 'ui': 0.16326530612244897,
 's': 0.07148468185388845,
 'c': 0.4257659073055774,
 'i': 0.36220472440944884,
 'a': 0.18224666142969365}

## Boosting (GradientBoostingClassifier)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import GradientBoostingClassifier

boosting_predictions = dict()
boosting_MSE = dict()
boosting_MAE = dict()

boosting1_total_results = []

for col in columns:
  X_train, X_test, y_train, y_test = train_test_df[col]
  boosting_model = GradientBoostingClassifier(n_estimators=100, random_state=0).fit(X_train, y_train)
  pred_final = boosting_model.predict(X_test)

  boosting_MSE[col] = mean_squared_error(y_test, pred_final)
  boosting_MAE[col] = mean_absolute_error(y_test, pred_final)
  boosting_predictions[col] = pred_final

  accuracy = accuracy_score(y_test, pred_final)
  precision_weighted = precision_score(y_test, pred_final, average='weighted')
  precision_macro = precision_score(y_test, pred_final, average='macro')

  recall_weighted = recall_score(y_test, pred_final, average='weighted')
  recall_macro = recall_score(y_test, pred_final, average='macro')

  f1score_weighted = f1_score(y_test, pred_final, average='weighted')
  f1score_macro = f1_score(y_test, pred_final, average='macro')

  # print(classification_report(y_test, pred_final))

  result_dict = {'Model' : 'GradientBoostingClassifier',
                  'col':col,
                  'MSE': boosting_MSE[col],
                  'MAE': boosting_MAE[col],
                  'Accuracy': accuracy,
                  'W_precision': precision_weighted,
                  'W_recall': recall_weighted,
                  'W_F1score': f1score_weighted,
                  'M_precision': precision_macro,
                  'M_recall': recall_macro,
                  'M_F1score': f1score_macro,
                  }
  print(result_dict)
  boosting1_total_results.append(result_dict)



  # print(set(y_test), set(pred_final))
  # print(f"Col: {col}; MSE: {boosting_MSE[col]}; MAE: {boosting_MAE[col]}")
  # print(classification_report(y_test, pred_final))
  # print("====================")

{'Model': 'GradientBoostingClassifier', 'col': 'av', 'MSE': 0.1966955153422502, 'MAE': 0.15263571990558616, 'Accuracy': 0.8670338316286389, 'W_precision': 0.8624089452543834, 'W_recall': 0.8670338316286389, 'W_F1score': 0.8628054070664513, 'M_precision': 0.7889498432601881, 'M_recall': 0.6025892603668079, 'M_F1score': 0.6655110481997124}
{'Model': 'GradientBoostingClassifier', 'col': 'ac', 'MSE': 0.05813040062843677, 'MAE': 0.05813040062843677, 'Accuracy': 0.9418695993715632, 'W_precision': 0.93155125381858, 'W_recall': 0.9418695993715632, 'W_F1score': 0.9338307690706988, 'M_precision': 0.781527940186386, 'M_recall': 0.6590972806279787, 'M_F1score': 0.7000700458481917}
{'Model': 'GradientBoostingClassifier', 'col': 'pr', 'MSE': 0.6459968602825745, 'MAE': 0.34301412872841447, 'Accuracy': 0.8084772370486656, 'W_precision': 0.8083956373554094, 'W_recall': 0.8084772370486656, 'W_F1score': 0.8009843130724408, 'M_precision': 0.8286805192063255, 'M_recall': 0.6818105571578995, 'M_F1score': 0.

In [None]:
boosting1_total_results_df = pd.DataFrame(boosting1_total_results)
boosting1_total_results_df.to_csv(output_dir + 'GradientBoosting'+output_name)

In [None]:
boosting1_total_results_df

Unnamed: 0,Model,col,MSE,MAE,Accuracy,W_precision,W_recall,W_F1score,M_precision,M_recall,M_F1score
0,GradientBoostingClassifier,av,0.196696,0.152636,0.867034,0.862409,0.867034,0.862805,0.78895,0.602589,0.665511
1,GradientBoostingClassifier,ac,0.05813,0.05813,0.94187,0.931551,0.94187,0.933831,0.781528,0.659097,0.70007
2,GradientBoostingClassifier,pr,0.645997,0.343014,0.808477,0.808396,0.808477,0.800984,0.828681,0.681811,0.730403
3,GradientBoostingClassifier,ui,0.145212,0.145212,0.854788,0.853677,0.854788,0.852962,0.849588,0.832407,0.839517
4,GradientBoostingClassifier,s,0.071485,0.071485,0.928515,0.929196,0.928515,0.928834,0.872361,0.879056,0.875655
5,GradientBoostingClassifier,c,0.35978,0.230951,0.833464,0.831858,0.833464,0.832463,0.805063,0.793347,0.798961
6,GradientBoostingClassifier,i,0.369291,0.251181,0.807874,0.820364,0.807874,0.804212,0.816909,0.787352,0.792702
7,GradientBoostingClassifier,a,0.181461,0.159466,0.851532,0.858586,0.851532,0.842319,0.908541,0.588654,0.616476


## AdaBoost

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier

adaboost_predictions = dict()
adaboost_MSE = dict()
adaboost_MAE = dict()

boosting2_total_results = []
for col in columns:
  X_train, X_test, y_train, y_test = train_test_df[col]
  adaboost_model = AdaBoostClassifier(n_estimators=100, random_state=0).fit(X_train, y_train)
  pred_final = adaboost_model.predict(X_test)

  adaboost_MSE[col] = mean_squared_error(y_test, pred_final)
  adaboost_MAE[col] = mean_absolute_error(y_test, pred_final)
  adaboost_predictions[col] = pred_final

  accuracy = accuracy_score(y_test, pred_final)
  precision_weighted = precision_score(y_test, pred_final, average='weighted')
  precision_macro = precision_score(y_test, pred_final, average='macro')

  recall_weighted = recall_score(y_test, pred_final, average='weighted')
  recall_macro = recall_score(y_test, pred_final, average='macro')

  f1score_weighted = f1_score(y_test, pred_final, average='weighted')
  f1score_macro = f1_score(y_test, pred_final, average='macro')

  # print(classification_report(y_test, pred_final))

  result_dict = {'Model' : 'GradientBoostingClassifier',
                  'col':col,
                  'MSE': adaboost_MSE[col],
                  'MAE': adaboost_MAE[col],
                  'Accuracy': accuracy,
                  'W_precision': precision_weighted,
                  'W_recall': recall_weighted,
                  'W_F1score': f1score_weighted,
                  'M_precision': precision_macro,
                  'M_recall': recall_macro,
                  'M_F1score': f1score_macro,
                  }
  print(result_dict)
  boosting2_total_results.append(result_dict)

  # print(set(y_test), set(pred_final))

  # print(f"Col: {col}; MSE: {adaboost_MSE[col]}; MAE: {adaboost_MAE[col]}")

  # print(classification_report(y_test, pred_final))
  # print("====================")

{'Model': 'GradientBoostingClassifier', 'col': 'av', 'MSE': 0.24626278520849726, 'MAE': 0.16915814319433517, 'Accuracy': 0.8638867033831629, 'W_precision': 0.8648389952291767, 'W_recall': 0.8638867033831629, 'W_F1score': 0.8547137038151168, 'M_precision': 0.7685963917041685, 'M_recall': 0.5827175103272866, 'M_F1score': 0.583389372617533}
{'Model': 'GradientBoostingClassifier', 'col': 'ac', 'MSE': 0.05813040062843677, 'MAE': 0.05813040062843677, 'Accuracy': 0.9418695993715632, 'W_precision': 0.93155125381858, 'W_recall': 0.9418695993715632, 'W_F1score': 0.9338307690706988, 'M_precision': 0.781527940186386, 'M_recall': 0.6590972806279787, 'M_F1score': 0.7000700458481917}
{'Model': 'GradientBoostingClassifier', 'col': 'pr', 'MSE': 0.6459968602825745, 'MAE': 0.34301412872841447, 'Accuracy': 0.8084772370486656, 'W_precision': 0.8083956373554094, 'W_recall': 0.8084772370486656, 'W_F1score': 0.8009843130724408, 'M_precision': 0.8286805192063255, 'M_recall': 0.6818105571578995, 'M_F1score': 0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
boosting2_total_results_df = pd.DataFrame(boosting2_total_results)
boosting2_total_results_df.to_csv(output_dir + 'AdaBoost'+output_name)

In [None]:
boosting2_total_results_df

Unnamed: 0,Model,col,MSE,MAE,Accuracy,W_precision,W_recall,W_F1score,M_precision,M_recall,M_F1score
0,GradientBoostingClassifier,av,0.246263,0.169158,0.863887,0.864839,0.863887,0.854714,0.768596,0.582718,0.583389
1,GradientBoostingClassifier,ac,0.05813,0.05813,0.94187,0.931551,0.94187,0.933831,0.781528,0.659097,0.70007
2,GradientBoostingClassifier,pr,0.645997,0.343014,0.808477,0.808396,0.808477,0.800984,0.828681,0.681811,0.730403
3,GradientBoostingClassifier,ui,0.145212,0.145212,0.854788,0.853677,0.854788,0.852962,0.849588,0.832407,0.839517
4,GradientBoostingClassifier,s,0.071485,0.071485,0.928515,0.929196,0.928515,0.928834,0.872361,0.879056,0.875655
5,GradientBoostingClassifier,c,0.35978,0.230951,0.833464,0.831858,0.833464,0.832463,0.805063,0.793347,0.798961
6,GradientBoostingClassifier,i,0.362992,0.248031,0.809449,0.823458,0.809449,0.804763,0.830294,0.777004,0.792862
7,GradientBoostingClassifier,a,0.187745,0.162608,0.849961,0.83938,0.849961,0.838778,0.574577,0.559668,0.562783


## Blending


In [None]:
from numpy import mean
from numpy import std
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from vecstack import stacking

from numpy import hstack
import copy

blending_predictions = dict()
blending_MSE = dict()
blending_MAE = dict()

# Models = [KNeighborsClassifier(), DecisionTreeClassifier(), SVC(), RandomForestClassifier(),
#          xgb.XGBClassifier(), GaussianNB()]
# estimator = copy.deepcopy(Models[5])

Model_names = ['KNN', 'DT', 'SVC', 'RF', 'XGB', 'GNB']

total_results_blending = []
for index, estimator_name in enumerate(Model_names):

  for col in columns:
    X_train, X_test, X_val, y_train, y_test, y_val = train_validation_test[col]

    if len(set(y_train))>2:
        Models = [KNeighborsClassifier(), DecisionTreeClassifier(), SVC(), RandomForestClassifier(),
          xgb.XGBClassifier(objective ='multi:softmax'), GaussianNB()]
    elif len(set(y_train))<=2:
        Models = [KNeighborsClassifier(), DecisionTreeClassifier(), SVC(), RandomForestClassifier(),
          xgb.XGBClassifier(objective='binary:logistic'), GaussianNB()]

    final_model = copy.deepcopy(Models[index])
    print("Final Model:", final_model)

    meta_X_fit = []
    ## Fit Ensemble
    for model in Models:
      model.fit(X_train, y_train)
      yhat = model.predict(X_val)
      yhat = yhat.reshape(len(yhat), 1)
      meta_X_fit.append(yhat)
    meta_X_fit = hstack(meta_X_fit)
    blender = final_model.fit(meta_X_fit, y_val)

    meta_X_test = []
    ## Predict Ensemble
    for model in Models:
      yhat = model.predict(X_test)
      yhat = yhat.reshape(len(yhat), 1)
      meta_X_test.append(yhat)
    meta_X_test = hstack(meta_X_test)
    pred_final  = blender.predict(meta_X_test)

    blending_predictions[col] = pred_final

    blending_MSE[col] = mean_squared_error(y_test, pred_final)
    blending_MAE[col] = mean_absolute_error(y_test, pred_final)

    accuracy = accuracy_score(y_test, pred_final)
    precision_weighted = precision_score(y_test, pred_final, average='weighted')
    precision_macro = precision_score(y_test, pred_final, average='macro')

    recall_weighted = recall_score(y_test, pred_final, average='weighted')
    recall_macro = recall_score(y_test, pred_final, average='macro')

    f1score_weighted = f1_score(y_test, pred_final, average='weighted')
    f1score_macro = f1_score(y_test, pred_final, average='macro')

    result_dict = {'Model': estimator_name,
                   'col':col,
                   'MSE': blending_MSE[col],
                   'MAE': blending_MAE[col],
                   'Accuracy': accuracy,
                   'W_precision': precision_weighted,
                   'W_recall': recall_weighted,
                   'W_F1score': f1score_weighted,
                   'M_precision': precision_macro,
                   'M_recall': recall_macro,
                   'M_F1score': f1score_macro,
                   }
    print(result_dict)
    total_results_blending.append(result_dict)


    # print(f"Col: {col}; MSE: {blending_MSE[col]}; MAE: {blending_MAE[col]}")
    # print(classification_report(y_test, pred))
    # print("====================")

Final Model: KNeighborsClassifier()


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'Model': 'KNN', 'col': 'av', 'MSE': 0.2006294256490952, 'MAE': 0.15342250196695514, 'Accuracy': 0.8670338316286389, 'W_precision': 0.8475245169971412, 'W_recall': 0.8670338316286389, 'W_F1score': 0.8534394935450487, 'M_precision': 0.5888646234809543, 'M_recall': 0.5143815380973625, 'M_F1score': 0.5439135874749985}
Final Model: KNeighborsClassifier()
{'Model': 'KNN', 'col': 'ac', 'MSE': 0.05813040062843677, 'MAE': 0.05813040062843677, 'Accuracy': 0.9418695993715632, 'W_precision': 0.93155125381858, 'W_recall': 0.9418695993715632, 'W_F1score': 0.9338307690706988, 'M_precision': 0.781527940186386, 'M_recall': 0.6590972806279787, 'M_F1score': 0.7000700458481917}
Final Model: KNeighborsClassifier()
{'Model': 'KNN', 'col': 'pr', 'MSE': 0.6459968602825745, 'MAE': 0.34301412872841447, 'Accuracy': 0.8084772370486656, 'W_precision': 0.8083956373554094, 'W_recall': 0.8084772370486656, 'W_F1score': 0.8009843130724408, 'M_precision': 0.8286805192063255, 'M_recall': 0.6818105571578995, 'M_F1score':

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'Model': 'KNN', 'col': 'a', 'MSE': 0.18538884524744698, 'MAE': 0.1633935585231736, 'Accuracy': 0.8476040848389631, 'W_precision': 0.8351317498424304, 'W_recall': 0.8476040848389631, 'W_F1score': 0.8367585914411261, 'M_precision': 0.570137759904167, 'M_recall': 0.5591312805756431, 'M_F1score': 0.5612760408407348}
Final Model: DecisionTreeClassifier()
{'Model': 'DT', 'col': 'av', 'MSE': 0.1966955153422502, 'MAE': 0.15263571990558616, 'Accuracy': 0.8670338316286389, 'W_precision': 0.8624089452543834, 'W_recall': 0.8670338316286389, 'W_F1score': 0.8628054070664513, 'M_precision': 0.7889498432601881, 'M_recall': 0.6025892603668079, 'M_F1score': 0.6655110481997124}
Final Model: DecisionTreeClassifier()
{'Model': 'DT', 'col': 'ac', 'MSE': 0.05813040062843677, 'MAE': 0.05813040062843677, 'Accuracy': 0.9418695993715632, 'W_precision': 0.93155125381858, 'W_recall': 0.9418695993715632, 'W_F1score': 0.9338307690706988, 'M_precision': 0.781527940186386, 'M_recall': 0.6590972806279787, 'M_F1score':

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'Model': 'XGB', 'col': 'a', 'MSE': 0.18538884524744698, 'MAE': 0.1633935585231736, 'Accuracy': 0.8476040848389631, 'W_precision': 0.8351317498424304, 'W_recall': 0.8476040848389631, 'W_F1score': 0.8367585914411261, 'M_precision': 0.570137759904167, 'M_recall': 0.5591312805756431, 'M_F1score': 0.5612760408407348}
Final Model: GaussianNB()
{'Model': 'GNB', 'col': 'av', 'MSE': 0.2022029897718332, 'MAE': 0.16601101494885917, 'Accuracy': 0.8512981904012589, 'W_precision': 0.8501547620223323, 'W_recall': 0.8512981904012589, 'W_F1score': 0.8506148409622196, 'M_precision': 0.603779616331181, 'M_recall': 0.5632708736996226, 'M_F1score': 0.5808778771135993}
Final Model: GaussianNB()
{'Model': 'GNB', 'col': 'ac', 'MSE': 0.05813040062843677, 'MAE': 0.05813040062843677, 'Accuracy': 0.9418695993715632, 'W_precision': 0.93155125381858, 'W_recall': 0.9418695993715632, 'W_F1score': 0.9338307690706988, 'M_precision': 0.781527940186386, 'M_recall': 0.6590972806279787, 'M_F1score': 0.7000700458481917}
Fi

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'Model': 'GNB', 'col': 'pr', 'MSE': 0.6797488226059655, 'MAE': 0.3751962323390895, 'Accuracy': 0.7770800627943485, 'W_precision': 0.7184851738101435, 'W_recall': 0.7770800627943485, 'W_F1score': 0.7461771458675482, 'M_precision': 0.49508886462167445, 'M_recall': 0.5252179645653069, 'M_F1score': 0.5093098164305595}
Final Model: GaussianNB()
{'Model': 'GNB', 'col': 'ui', 'MSE': 0.14521193092621665, 'MAE': 0.14521193092621665, 'Accuracy': 0.8547880690737834, 'W_precision': 0.8536771966540238, 'W_recall': 0.8547880690737834, 'W_F1score': 0.8529617308843465, 'M_precision': 0.8495882407259723, 'M_recall': 0.8324074074074075, 'M_F1score': 0.8395167731493429}
Final Model: GaussianNB()
{'Model': 'GNB', 'col': 's', 'MSE': 0.07148468185388845, 'MAE': 0.07148468185388845, 'Accuracy': 0.9285153181461115, 'W_precision': 0.9291959713035559, 'W_recall': 0.9285153181461115, 'W_F1score': 0.9288337511593536, 'M_precision': 0.8723614326569522, 'M_recall': 0.8790560855362914, 'M_F1score': 0.87565543794793

In [None]:
blending_total_results_df = pd.DataFrame(total_results_blending)
blending_total_results_df.to_csv(output_dir + 'Blending'+output_name)

In [None]:
blending_total_results_df

Unnamed: 0,Model,col,MSE,MAE,Accuracy,W_precision,W_recall,W_F1score,M_precision,M_recall,M_F1score
0,KNN,av,0.200629,0.153423,0.867034,0.847525,0.867034,0.853439,0.588865,0.514382,0.543914
1,KNN,ac,0.05813,0.05813,0.94187,0.931551,0.94187,0.933831,0.781528,0.659097,0.70007
2,KNN,pr,0.645997,0.343014,0.808477,0.808396,0.808477,0.800984,0.828681,0.681811,0.730403
3,KNN,ui,0.145212,0.145212,0.854788,0.853677,0.854788,0.852962,0.849588,0.832407,0.839517
4,KNN,s,0.071485,0.071485,0.928515,0.929196,0.928515,0.928834,0.872361,0.879056,0.875655
5,KNN,c,0.363708,0.230165,0.836606,0.834252,0.836606,0.834309,0.815705,0.788601,0.800617
6,KNN,i,0.340945,0.233858,0.819685,0.820906,0.819685,0.820114,0.804568,0.81321,0.808633
7,KNN,a,0.185389,0.163394,0.847604,0.835132,0.847604,0.836759,0.570138,0.559131,0.561276
8,DT,av,0.196696,0.152636,0.867034,0.862409,0.867034,0.862805,0.78895,0.602589,0.665511
9,DT,ac,0.05813,0.05813,0.94187,0.931551,0.94187,0.933831,0.781528,0.659097,0.70007


In [None]:
 blending_MAE

{'av': 0.16601101494885917,
 'ac': 0.05813040062843677,
 'pr': 0.3751962323390895,
 'ui': 0.14521193092621665,
 's': 0.07148468185388845,
 'c': 0.23095051060487037,
 'i': 0.25196850393700787,
 'a': 0.1681068342498036}

In [None]:
 blending_MSE

{'av': 0.2022029897718332,
 'ac': 0.05813040062843677,
 'pr': 0.6797488226059655,
 'ui': 0.14521193092621665,
 's': 0.07148468185388845,
 'c': 0.35978004713275724,
 'i': 0.3700787401574803,
 'a': 0.19638648860958366}

## LLMs Evaluation

* gpt4_original_vec
* gpt4_gpt4_vec
* gpt4_gpt35_vec


* gpt35_original_vec
* gpt35_gpt4_vec


* original_vec

### GPT4_original

In [None]:
MSE_gpt4 = dict()
MAE_gpt4 = dict()
print("GPT4 MSE:")

gpt4_results = []
for col in columns:
    pred_final = np.array(gpt4_original_vec[col])
    y_true = np.array(original_vec[col])
    MSE_gpt4[col] = mean_squared_error(pred_final, y_true)
    MAE_gpt4[col] = mean_absolute_error(pred_final, y_true)

    accuracy = accuracy_score(y_true, pred_final)
    precision_weighted = precision_score(y_true, pred_final, average='weighted')
    precision_macro = precision_score(y_true, pred_final, average='macro')

    recall_weighted = recall_score(y_true, pred_final, average='weighted')
    recall_macro = recall_score(y_true, pred_final, average='macro')

    f1score_weighted = f1_score(y_true, pred_final, average='weighted')
    f1score_macro = f1_score(y_true, pred_final, average='macro')

    result_dict = {'Models': 'GPT4',
                    'Descriptions': 'Original',
                    'col':col,
                    'MSE': MSE_gpt4[col],
                    'MAE': MAE_gpt4[col],
                    'Accuracy': accuracy,
                    'W_precision': precision_weighted,
                    'W_recall': recall_weighted,
                    'W_F1score': f1score_weighted,
                    'M_precision': precision_macro,
                    'M_recall': recall_macro,
                    'M_F1score': f1score_macro,
                    }
    print(result_dict)
    gpt4_results.append(result_dict)

  # print(f"Col: {col}; MSE: {MSE_gpt4[col]}; MAE: {MAE_gpt4[col]}")

  # print(classification_report(np.array(gpt4_vec[col]), np.array(original_vec[col])))
  # print("====================")

GPT4 MSE:
{'Models': 'GPT4', 'Descriptions': 'Original', 'col': 'av', 'MSE': 0.3178391959798995, 'MAE': 0.21105527638190955, 'Accuracy': 0.8310301507537688, 'W_precision': 0.8419058046876521, 'W_recall': 0.8310301507537688, 'W_F1score': 0.8345855355167203, 'M_precision': 0.5740294228240655, 'M_recall': 0.5975162655671705, 'M_F1score': 0.5783786221849773}
{'Models': 'GPT4', 'Descriptions': 'Original', 'col': 'ac', 'MSE': 0.13159547738693467, 'MAE': 0.13159547738693467, 'Accuracy': 0.8684045226130653, 'W_precision': 0.8779937677093579, 'W_recall': 0.8684045226130653, 'W_F1score': 0.8730616546773159, 'M_precision': 0.5624842156386596, 'M_recall': 0.5720087649703427, 'M_F1score': 0.5665996471322429}
{'Models': 'GPT4', 'Descriptions': 'Original', 'col': 'pr', 'MSE': 0.7160804020100503, 'MAE': 0.3866206030150754, 'Accuracy': 0.778109296482412, 'W_precision': 0.7870588768968362, 'W_recall': 0.778109296482412, 'W_F1score': 0.7530394958968494, 'M_precision': 0.7558863096597938, 'M_recall': 0.63

In [None]:
gpt4_total_results_df = pd.DataFrame(gpt4_results)
gpt4_total_results_df.to_csv(output_dir + 'GPT4'+output_name)

In [None]:
gpt4_total_results_df

Unnamed: 0,Models,Descriptions,col,MSE,MAE,Accuracy,W_precision,W_recall,W_F1score,M_precision,M_recall,M_F1score
0,GPT4,Original,av,0.317839,0.211055,0.83103,0.841906,0.83103,0.834586,0.574029,0.597516,0.578379
1,GPT4,Original,ac,0.131595,0.131595,0.868405,0.877994,0.868405,0.873062,0.562484,0.572009,0.5666
2,GPT4,Original,pr,0.71608,0.386621,0.778109,0.787059,0.778109,0.753039,0.755886,0.632021,0.661861
3,GPT4,Original,ui,0.183103,0.183103,0.816897,0.81447,0.816897,0.814812,0.803147,0.78963,0.795357
4,GPT4,Original,s,0.163317,0.163317,0.836683,0.855631,0.836683,0.768432,0.894148,0.518364,0.490948
5,GPT4,Original,c,0.701005,0.404209,0.74419,0.75986,0.74419,0.717056,0.783557,0.61439,0.652849
6,GPT4,Original,i,0.427136,0.330402,0.717965,0.735831,0.717965,0.692126,0.70957,0.624261,0.610003
7,GPT4,Original,a,0.220006,0.187343,0.828989,0.837709,0.828989,0.828221,0.633316,0.62143,0.621445


In [None]:
MAE_gpt4

{'av': 0.21105527638190955,
 'ac': 0.13159547738693467,
 'pr': 0.3866206030150754,
 'ui': 0.18310301507537688,
 's': 0.16331658291457288,
 'c': 0.4042085427135678,
 'i': 0.33040201005025127,
 'a': 0.1873429648241206}

In [None]:
MSE_gpt4

{'av': 0.3178391959798995,
 'ac': 0.13159547738693467,
 'pr': 0.7160804020100503,
 'ui': 0.18310301507537688,
 's': 0.16331658291457288,
 'c': 0.7010050251256281,
 'i': 0.4271356783919598,
 'a': 0.22000628140703518}

### GPT4_GPT4

In [None]:
MSE_gpt4 = dict()
MAE_gpt4 = dict()
print("GPT4 MSE:")

gpt4_gpt4_results = []
for col in columns:
    pred_final = np.array(gpt4_gpt4_vec[col])
    y_true = np.array(original_vec[col])
    MSE_gpt4[col] = mean_squared_error(pred_final, y_true)
    MAE_gpt4[col] = mean_absolute_error(pred_final, y_true)

    accuracy = accuracy_score(y_true, pred_final)
    precision_weighted = precision_score(y_true, pred_final, average='weighted')
    precision_macro = precision_score(y_true, pred_final, average='macro')

    recall_weighted = recall_score(y_true, pred_final, average='weighted')
    recall_macro = recall_score(y_true, pred_final, average='macro')

    f1score_weighted = f1_score(y_true, pred_final, average='weighted')
    f1score_macro = f1_score(y_true, pred_final, average='macro')

    result_dict = {'Models': 'GPT4',
                    'Descriptions': 'GPT4',
                    'col':col,
                    'MSE': MSE_gpt4[col],
                    'MAE': MAE_gpt4[col],
                    'Accuracy': accuracy,
                    'W_precision': precision_weighted,
                    'W_recall': recall_weighted,
                    'W_F1score': f1score_weighted,
                    'M_precision': precision_macro,
                    'M_recall': recall_macro,
                    'M_F1score': f1score_macro,
                    }
    print(result_dict)
    gpt4_gpt4_results.append(result_dict)

  # print(f"Col: {col}; MSE: {MSE_gpt4[col]}; MAE: {MAE_gpt4[col]}")

  # print(classification_report(np.array(gpt4_vec[col]), np.array(original_vec[col])))
  # print("====================")

GPT4 MSE:
{'Models': 'GPT4', 'Descriptions': 'GPT4', 'col': 'av', 'MSE': 0.2506281407035176, 'MAE': 0.18090452261306533, 'Accuracy': 0.8489321608040201, 'W_precision': 0.851477800828032, 'W_recall': 0.8489321608040201, 'W_F1score': 0.8496904201169018, 'M_precision': 0.6461542440821558, 'M_recall': 0.6242272936616557, 'M_F1score': 0.6328337542350086}
{'Models': 'GPT4', 'Descriptions': 'GPT4', 'col': 'ac', 'MSE': 0.13033919597989949, 'MAE': 0.13033919597989949, 'Accuracy': 0.8696608040201005, 'W_precision': 0.8782839911630169, 'W_recall': 0.8696608040201005, 'W_F1score': 0.8738587363341972, 'M_precision': 0.5638872491145218, 'M_recall': 0.5726888057727908, 'M_F1score': 0.5677500502954757}
{'Models': 'GPT4', 'Descriptions': 'GPT4', 'col': 'pr', 'MSE': 0.7250314070351759, 'MAE': 0.3908605527638191, 'Accuracy': 0.7762248743718593, 'W_precision': 0.7813765618091034, 'W_recall': 0.7762248743718593, 'W_F1score': 0.7512878181888799, 'M_precision': 0.7538037591216025, 'M_recall': 0.6227613789223

In [None]:
gpt4_gpt4_total_results_df = pd.DataFrame(gpt4_gpt4_results)
gpt4_gpt4_total_results_df.to_csv(output_dir + 'GPT4_GPT4'+output_name)

In [None]:
gpt4_gpt4_total_results_df

Unnamed: 0,Models,Descriptions,col,MSE,MAE,Accuracy,W_precision,W_recall,W_F1score,M_precision,M_recall,M_F1score
0,GPT4,GPT4,av,0.250628,0.180905,0.848932,0.851478,0.848932,0.84969,0.646154,0.624227,0.632834
1,GPT4,GPT4,ac,0.130339,0.130339,0.869661,0.878284,0.869661,0.873859,0.563887,0.572689,0.56775
2,GPT4,GPT4,pr,0.725031,0.390861,0.776225,0.781377,0.776225,0.751288,0.753804,0.622761,0.657108
3,GPT4,GPT4,ui,0.17478,0.17478,0.82522,0.824238,0.82522,0.824643,0.809692,0.805631,0.807561
4,GPT4,GPT4,s,0.167085,0.167085,0.832915,0.860894,0.832915,0.759148,0.916273,0.506494,0.467132
5,GPT4,GPT4,c,0.830402,0.471106,0.708543,0.731235,0.708543,0.650746,0.759903,0.548779,0.557691
6,GPT4,GPT4,i,0.669598,0.40892,0.72142,0.740526,0.72142,0.674488,0.759514,0.598415,0.587285
7,GPT4,GPT4,a,0.226759,0.198178,0.816112,0.807674,0.816112,0.810379,0.635665,0.573098,0.585735


### GPT4_GPT3.5turbo

In [None]:
MSE_gpt4 = dict()
MAE_gpt4 = dict()
print("GPT4 GPT3.5turbo")

gpt4_gpt35_results = []
for col in columns:
    pred_final = np.array(gpt4_gpt35_vec[col])
    y_true = np.array(original_vec[col])
    MSE_gpt4[col] = mean_squared_error(pred_final, y_true)
    MAE_gpt4[col] = mean_absolute_error(pred_final, y_true)

    accuracy = accuracy_score(y_true, pred_final)
    precision_weighted = precision_score(y_true, pred_final, average='weighted')
    precision_macro = precision_score(y_true, pred_final, average='macro')

    recall_weighted = recall_score(y_true, pred_final, average='weighted')
    recall_macro = recall_score(y_true, pred_final, average='macro')

    f1score_weighted = f1_score(y_true, pred_final, average='weighted')
    f1score_macro = f1_score(y_true, pred_final, average='macro')

    result_dict = {'Models': 'GPT4',
                    'Descriptions': 'GPT35turbo',
                    'col':col,
                    'MSE': MSE_gpt4[col],
                    'MAE': MAE_gpt4[col],
                    'Accuracy': accuracy,
                    'W_precision': precision_weighted,
                    'W_recall': recall_weighted,
                    'W_F1score': f1score_weighted,
                    'M_precision': precision_macro,
                    'M_recall': recall_macro,
                    'M_F1score': f1score_macro,
                    }
    print(result_dict)
    gpt4_gpt35_results.append(result_dict)

  # print(f"Col: {col}; MSE: {MSE_gpt4[col]}; MAE: {MAE_gpt4[col]}")

  # print(classification_report(np.array(gpt4_vec[col]), np.array(original_vec[col])))
  # print("====================")

GPT4 GPT3.5turbo
{'Models': 'GPT4', 'Descriptions': 'GPT35turbo', 'col': 'av', 'MSE': 0.2749685929648241, 'MAE': 0.18985552763819097, 'Accuracy': 0.8459484924623115, 'W_precision': 0.847800278662938, 'W_recall': 0.8459484924623115, 'W_F1score': 0.8468133978472561, 'M_precision': 0.60653637814692, 'M_recall': 0.6225605127425724, 'M_F1score': 0.6141898455642005}
{'Models': 'GPT4', 'Descriptions': 'GPT35turbo', 'col': 'ac', 'MSE': 0.11070979899497488, 'MAE': 0.11070979899497488, 'Accuracy': 0.8892902010050251, 'W_precision': 0.879376503662618, 'W_recall': 0.8892902010050251, 'W_F1score': 0.8841264687822399, 'M_precision': 0.5788466673725694, 'M_recall': 0.5653822151139768, 'M_F1score': 0.5710534454771976}
{'Models': 'GPT4', 'Descriptions': 'GPT35turbo', 'col': 'pr', 'MSE': 0.7525125628140703, 'MAE': 0.4045226130653266, 'Accuracy': 0.7694723618090452, 'W_precision': 0.7683553827675964, 'W_recall': 0.7694723618090452, 'W_F1score': 0.7462540882009358, 'M_precision': 0.738297559025339, 'M_rec

In [None]:
gpt4_gpt35_results_df = pd.DataFrame(gpt4_gpt35_results)
gpt4_gpt35_results_df.to_csv(output_dir + 'GPT4_GPT35turbo'+output_name)

In [None]:
gpt4_gpt35_results_df

Unnamed: 0,Models,Descriptions,col,MSE,MAE,Accuracy,W_precision,W_recall,W_F1score,M_precision,M_recall,M_F1score
0,GPT4,GPT35turbo,av,0.274969,0.189856,0.845948,0.8478,0.845948,0.846813,0.606536,0.622561,0.61419
1,GPT4,GPT35turbo,ac,0.11071,0.11071,0.88929,0.879377,0.88929,0.884126,0.578847,0.565382,0.571053
2,GPT4,GPT35turbo,pr,0.752513,0.404523,0.769472,0.768355,0.769472,0.746254,0.738298,0.622116,0.65453
3,GPT4,GPT35turbo,ui,0.186558,0.186558,0.813442,0.81547,0.813442,0.814273,0.795342,0.80065,0.797784
4,GPT4,GPT35turbo,s,0.168813,0.168813,0.831187,0.825928,0.831187,0.755332,0.815606,0.501761,0.457568
5,GPT4,GPT35turbo,c,0.841394,0.474246,0.709328,0.742703,0.709328,0.64831,0.780471,0.546412,0.553939
6,GPT4,GPT35turbo,i,0.721106,0.429648,0.71608,0.747411,0.71608,0.667865,0.77484,0.588231,0.580813
7,GPT4,GPT35turbo,a,0.246388,0.214981,0.800722,0.800189,0.800722,0.789894,0.622791,0.548247,0.564322


### GPT3.5Turbo_Original

In [None]:
MSE_gpt35turbo = dict()
MAE_gpt35turbo = dict()
print("GPT35 Original")

gpt35_original_results = []
for col in columns:
    array = np.array(pd.concat([gpt35_original_vec[col], original_vec[col]], axis=1))
    array = np.array(array)[~np.isnan(array).any(axis=1)]

    pred_final = array[:,0]
    y_true = array[:,1]

    MSE_gpt35turbo[col] = mean_squared_error(pred_final, y_true)
    MAE_gpt35turbo[col] = mean_absolute_error(pred_final, y_true)

    accuracy = accuracy_score(y_true, pred_final)
    precision_weighted = precision_score(y_true, pred_final, average='weighted')
    precision_macro = precision_score(y_true, pred_final, average='macro')

    recall_weighted = recall_score(y_true, pred_final, average='weighted')
    recall_macro = recall_score(y_true, pred_final, average='macro')

    f1score_weighted = f1_score(y_true, pred_final, average='weighted')
    f1score_macro = f1_score(y_true, pred_final, average='macro')

    result_dict = {'Models': 'GPT35turbo',
                    'Descriptions': 'Original',
                    'col':col,
                    'MSE': MSE_gpt4[col],
                    'MAE': MAE_gpt4[col],
                    'Accuracy': accuracy,
                    'W_precision': precision_weighted,
                    'W_recall': recall_weighted,
                    'W_F1score': f1score_weighted,
                    'M_precision': precision_macro,
                    'M_recall': recall_macro,
                    'M_F1score': f1score_macro,
                    }
    print(result_dict)
    gpt35_original_results.append(result_dict)

  # print(f"Col: {col}; MSE: {MSE_gpt4[col]}; MAE: {MAE_gpt4[col]}")

  # print(classification_report(np.array(gpt4_vec[col]), np.array(original_vec[col])))
  # print("====================")

GPT35 Original
{'Models': 'GPT35turbo', 'Descriptions': 'Original', 'col': 'av', 'MSE': 0.2506281407035176, 'MAE': 0.18090452261306533, 'Accuracy': 0.8716939546599496, 'W_precision': 0.8688085502904845, 'W_recall': 0.8716939546599496, 'W_F1score': 0.8693198980624951, 'M_precision': 0.6453319864019631, 'M_recall': 0.6376646689151425, 'M_F1score': 0.6397012099359399}
{'Models': 'GPT35turbo', 'Descriptions': 'Original', 'col': 'ac', 'MSE': 0.13033919597989949, 'MAE': 0.13033919597989949, 'Accuracy': 0.9347894406033941, 'W_precision': 0.923900116559698, 'W_recall': 0.9347894406033941, 'W_F1score': 0.925877565400667, 'M_precision': 0.7897973451174674, 'M_recall': 0.6648611254806993, 'M_F1score': 0.706376858352677}
{'Models': 'GPT35turbo', 'Descriptions': 'Original', 'col': 'pr', 'MSE': 0.7250314070351759, 'MAE': 0.3908605527638191, 'Accuracy': 0.7934338674206723, 'W_precision': 0.7874810377254362, 'W_recall': 0.7934338674206723, 'W_F1score': 0.7883846830836603, 'M_precision': 0.700939681441

In [None]:
gpt35_original_results_df = pd.DataFrame(gpt35_original_results)
gpt35_original_results_df.to_csv(output_dir + 'GPT35_Original'+output_name)

In [None]:
gpt35_original_results_df

Unnamed: 0,Models,Descriptions,col,MSE,MAE,Accuracy,W_precision,W_recall,W_F1score,M_precision,M_recall,M_F1score
0,GPT35turbo,Original,av,0.250628,0.180905,0.871694,0.868809,0.871694,0.86932,0.645332,0.637665,0.639701
1,GPT35turbo,Original,ac,0.130339,0.130339,0.934789,0.9239,0.934789,0.925878,0.789797,0.664861,0.706377
2,GPT35turbo,Original,pr,0.725031,0.390861,0.793434,0.787481,0.793434,0.788385,0.70094,0.67565,0.686229
3,GPT35turbo,Original,ui,0.17478,0.17478,0.856918,0.855533,0.856918,0.85517,0.849466,0.832771,0.839861
4,GPT35turbo,Original,s,0.167085,0.167085,0.942323,0.941792,0.942323,0.94203,0.900435,0.89238,0.896337
5,GPT35turbo,Original,c,0.830402,0.471106,0.838887,0.83717,0.838887,0.83738,0.820232,0.799863,0.809267
6,GPT35turbo,Original,i,0.669598,0.40892,0.810866,0.816966,0.810866,0.807378,0.811441,0.785776,0.792139
7,GPT35turbo,Original,a,0.226759,0.198178,0.829195,0.821901,0.829195,0.824142,0.642585,0.589425,0.603776


### Gpt3.5turbo GPT4

In [None]:
MSE_gpt35turbo = dict()
MAE_gpt35turbo = dict()
print("GPT35 Original")

gpt35_gpt4_results = []
for col in columns:
    array = np.array(pd.concat([gpt35_gpt4_vec[col], original_vec[col]], axis=1))
    array = np.array(array)[~np.isnan(array).any(axis=1)]

    pred_final = array[:,0]
    y_true = array[:,1]

    MSE_gpt35turbo[col] = mean_squared_error(pred_final, y_true)
    MAE_gpt35turbo[col] = mean_absolute_error(pred_final, y_true)

    accuracy = accuracy_score(y_true, pred_final)
    precision_weighted = precision_score(y_true, pred_final, average='weighted')
    precision_macro = precision_score(y_true, pred_final, average='macro')

    recall_weighted = recall_score(y_true, pred_final, average='weighted')
    recall_macro = recall_score(y_true, pred_final, average='macro')

    f1score_weighted = f1_score(y_true, pred_final, average='weighted')
    f1score_macro = f1_score(y_true, pred_final, average='macro')

    result_dict = {'Models': 'GPT35turbo',
                    'Descriptions': 'GPT4',
                    'col':col,
                    'MSE': MSE_gpt4[col],
                    'MAE': MAE_gpt4[col],
                    'Accuracy': accuracy,
                    'W_precision': precision_weighted,
                    'W_recall': recall_weighted,
                    'W_F1score': f1score_weighted,
                    'M_precision': precision_macro,
                    'M_recall': recall_macro,
                    'M_F1score': f1score_macro,
                    }
    print(result_dict)
    gpt35_gpt4_results.append(result_dict)

  # print(f"Col: {col}; MSE: {MSE_gpt4[col]}; MAE: {MAE_gpt4[col]}")

  # print(classification_report(np.array(gpt4_vec[col]), np.array(original_vec[col])))
  # print("====================")

GPT35 Original
{'Models': 'GPT35turbo', 'Descriptions': 'GPT4', 'col': 'av', 'MSE': 0.2506281407035176, 'MAE': 0.18090452261306533, 'Accuracy': 0.8795580110497238, 'W_precision': 0.8767184791922867, 'W_recall': 0.8795580110497238, 'W_F1score': 0.8776668934859669, 'M_precision': 0.6504030832482728, 'M_recall': 0.6441763816674115, 'M_F1score': 0.6434544451230574}
{'Models': 'GPT35turbo', 'Descriptions': 'GPT4', 'col': 'ac', 'MSE': 0.13033919597989949, 'MAE': 0.13033919597989949, 'Accuracy': 0.928964325003929, 'W_precision': 0.914456972371902, 'W_recall': 0.928964325003929, 'W_F1score': 0.9176830222794341, 'M_precision': 0.7540139761525406, 'M_recall': 0.630908015380065, 'M_F1score': 0.6681535517708317}
{'Models': 'GPT35turbo', 'Descriptions': 'GPT4', 'col': 'pr', 'MSE': 0.7250314070351759, 'MAE': 0.3908605527638191, 'Accuracy': 0.7969216271399403, 'W_precision': 0.7899611820111915, 'W_recall': 0.7969216271399403, 'W_F1score': 0.7897885538452081, 'M_precision': 0.7111293692672048, 'M_reca

In [None]:
gpt35_gpt4_results_df = pd.DataFrame(gpt35_gpt4_results)
gpt35_gpt4_results_df.to_csv(output_dir + 'GPT35_GPT4'+output_name)

In [None]:
gpt35_gpt4_results_df

Unnamed: 0,Models,Descriptions,col,MSE,MAE,Accuracy,W_precision,W_recall,W_F1score,M_precision,M_recall,M_F1score
0,GPT35turbo,GPT4,av,0.250628,0.180905,0.879558,0.876718,0.879558,0.877667,0.650403,0.644176,0.643454
1,GPT35turbo,GPT4,ac,0.130339,0.130339,0.928964,0.914457,0.928964,0.917683,0.754014,0.630908,0.668154
2,GPT35turbo,GPT4,pr,0.725031,0.390861,0.796922,0.789961,0.796922,0.789789,0.711129,0.671696,0.687593
3,GPT35turbo,GPT4,ui,0.17478,0.17478,0.85,0.849334,0.85,0.849611,0.836968,0.833493,0.835165
4,GPT35turbo,GPT4,s,0.167085,0.167085,0.931539,0.931126,0.931539,0.931322,0.880159,0.87537,0.877738
5,GPT35turbo,GPT4,c,0.830402,0.471106,0.815938,0.815717,0.815938,0.811186,0.807513,0.753227,0.774256
6,GPT35turbo,GPT4,i,0.669598,0.40892,0.808316,0.814908,0.808316,0.804793,0.819727,0.772438,0.789526
7,GPT35turbo,GPT4,a,0.226759,0.198178,0.823899,0.818938,0.823899,0.817357,0.649997,0.583648,0.601789
