In [1]:
import pandas as pd
import numpy as np
import random
import time
from scipy.stats import mode
import sys
import os
import csv

sys.path.append(os.path.join("..", ".."))

# Word Embedding
from sentence_transformers import SentenceTransformer

random.seed(0)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from hydra import initialize, compose

with initialize(version_base=None, config_path="../../conf"):
    cfg=compose(config_name="main.yaml")



In [3]:
def set_labels(matrix, classes):
    topks = np.argsort(matrix, axis=1)[:,::-1][:,:5]
    
    return classes[topks]

# E5

In [4]:
import gensim
import spacy
from functions.Vectorize import to_vector


# Data import
# Relative Path of the dataset, change for your dataset
dataset_name = "cpn120"
# Options are "cpn27" and "cpn120"

type_standardization = "normalize"
# options are "raw", "normalize", "normalize_wo_stop", "lemmatize", and "lemmatize_wo_stop"

# import of the data
data = pd.read_csv(cfg.path[type_standardization][dataset_name], delimiter=",")
data = data.fillna(value='')

# Import of the model
model = SentenceTransformer('intfloat/e5-base-v2')



# Embedding matrix

start = time.time()

# Creation of a matrix full of 0 to save the vectors of each feature
descriptions_matrix = np.zeros( # creatign an empty matrix
    (
        len(data),          # the number of data points
        cfg.params.vector_length.sentence_embedding       # the number of components of the word embedding
    )
)

# Change to the name of the descriptions of your dataset.
for i,description in enumerate(data.iloc[:,1]):
    vector = model.encode(description)
    descriptions_matrix[i,] = vector

# Concatenate the matrix with the data of each observation
data_matrix = np.concatenate([descriptions_matrix,data], axis=1)

# Remove of the 'Nan' data
data_matrix = data_matrix[~pd.isnull(data_matrix[:,:cfg.params.vector_length.sentence_embedding]).any(axis=1)]

end = time.time()
print("Execution time:", end-start)

Execution time: 854.6068270206451


In [5]:
name_file = f"../../../data/experiment-top5/experiment_2/resultados_exp_2_{dataset_name}_E5_top5.csv"

fields = ["model","mean_acc_top1","std_top1","mean_acc_top3","std_top3","mean_acc_top5","std_top5","E-type"]

if not os.path.isfile(name_file):
    with open(name_file, 'w', newline='') as f:
        csvwriter = csv.DictWriter(f, fieldnames = fields)
        csvwriter.writeheader() 
        f.close()

# Classification Models

## Original

In [6]:
# Classification Model
from functions.AC_PLT import AC_PLT
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold


accuracies_tops_test = np.zeros((5,3))
accuracies_tops_train = np.zeros((5,3))
tops = (1, 3, 5)


cross_validation = StratifiedKFold(n_splits = 5)

X = data_matrix[:, :cfg.params.vector_length.sentence_embedding] 
y = data_matrix[:, cfg.params.vector_length.sentence_embedding+2]

temp_test_acc = np.zeros(5)

for c, (train_index, test_index) in enumerate(cross_validation.split(X, y)):

    X_train = X[train_index, :]
    y_train = y[train_index]

    X_test = X[test_index, :]
    y_test = y[test_index]
    
    ## MODIFICAR ESTE HIPERPARAMETRO
    classification_model = AC_PLT(n_clusters=cfg.params.kmeans.n_cluster[dataset_name])

    classification_model.fit(X_train, y_train)

    pred_ranking_test = classification_model.suggestions(X_test, n_codes=5)
    pred_ranking_train = classification_model.suggestions(X_train, n_codes=5)

    top1_acc_test = np.zeros(len(y_test))
    top3_acc_test = np.zeros(len(y_test))
    top5_acc_test = np.zeros(len(y_test))

    
    for j in range(len(y_test)):
        top1_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[0]])
        top3_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[1]])
        top5_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[2]])


    accuracies_tops_test[c] = [np.mean(top1_acc_test), np.mean(top3_acc_test), np.mean(top5_acc_test)]



    top1_acc_train = np.zeros(len(y_train))
    top3_acc_train = np.zeros(len(y_train))
    top5_acc_train = np.zeros(len(y_train))

    
    for j in range(len(y_train)):
        top1_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[0]])
        top3_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[1]])
        top5_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[2]])
    
    accuracies_tops_train[c] = [np.mean(top1_acc_train), np.mean(top3_acc_train), np.mean(top5_acc_train)]


train_mean = accuracies_tops_train.mean(axis=0)
train_std = accuracies_tops_train.std(axis=0)

row_train = [classification_model.__class__.__name__, 
            train_mean[0], train_std[0],
            train_mean[1], train_std[1],
            train_mean[2], train_std[2],
            "Train"
            ]

test_mean = accuracies_tops_test.mean(axis=0)
test_std = accuracies_tops_test.std(axis=0)

row_test = [classification_model.__class__.__name__, 
            test_mean[0], test_std[0],
            test_mean[1], test_std[1],
            test_mean[2], test_std[2],
            "Test"
            ]

with open(name_file, 'a', newline='') as f:
    csvwriter = csv.writer(f)
    csvwriter.writerow(row_train)
    csvwriter.writerow(row_test)
    f.close()


print(accuracies_tops_train.mean(axis=0))
print(accuracies_tops_test.mean(axis=0))




[0.59902243 0.63469749 0.64643487]
[0.54497264 0.59057276 0.60588784]


## Naïve Bayes

In [7]:
# Classification Model
from functions.AC_PLT import AC_PLT
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.naive_bayes import GaussianNB



accuracies_tops_test = np.zeros((5,3))
accuracies_tops_train = np.zeros((5,3))
tops = (1, 3, 5)


cross_validation = StratifiedKFold(n_splits = 5)

X = data_matrix[:, :cfg.params.vector_length.sentence_embedding] 
y = data_matrix[:, cfg.params.vector_length.sentence_embedding+2]


temp_test_acc = np.zeros(5)


for c, (train_index, test_index) in enumerate(cross_validation.split(X, y)):

    X_train = X[train_index, :]
    y_train = y[train_index]

    X_test = X[test_index, :]
    y_test = y[test_index]


    classification_model = GaussianNB(var_smoothing=40)

    classification_model.fit(X_train, y_train)
    pred = classification_model.predict_proba(X_test)

    pred_ranking_test = set_labels(classification_model.predict_proba(X_test), classification_model.classes_)
    pred_ranking_train = set_labels(classification_model.predict_proba(X_train), classification_model.classes_)
    
    top1_acc_test = np.zeros(len(y_test))
    top3_acc_test = np.zeros(len(y_test))
    top5_acc_test = np.zeros(len(y_test))

    
    for j in range(len(y_test)):
        top1_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[0]])
        top3_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[1]])
        top5_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[2]])


    accuracies_tops_test[c] = [np.mean(top1_acc_test), np.mean(top3_acc_test), np.mean(top5_acc_test)]



    top1_acc_train = np.zeros(len(y_train))
    top3_acc_train = np.zeros(len(y_train))
    top5_acc_train = np.zeros(len(y_train))

    
    for j in range(len(y_train)):
        top1_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[0]])
        top3_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[1]])
        top5_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[2]])
    
    accuracies_tops_train[c] = [np.mean(top1_acc_train), np.mean(top3_acc_train), np.mean(top5_acc_train)]
    
    # break

print(accuracies_tops_train.mean(axis=0))
print(accuracies_tops_test.mean(axis=0))

train_mean = accuracies_tops_train.mean(axis=0)
train_std = accuracies_tops_train.std(axis=0)

row_train = [classification_model.__class__.__name__, 
            train_mean[0], train_std[0],
            train_mean[1], train_std[1],
            train_mean[2], train_std[2],
            "Train"
            ]

test_mean = accuracies_tops_test.mean(axis=0)
test_std = accuracies_tops_test.std(axis=0)

row_test = [classification_model.__class__.__name__, 
            test_mean[0], test_std[0],
            test_mean[1], test_std[1],
            test_mean[2], test_std[2],
            "Test"
            ]

with open(name_file, 'a', newline='') as f:
    csvwriter = csv.writer(f)
    csvwriter.writerow(row_train)
    csvwriter.writerow(row_test)
    f.close()




[0.5686747  0.69204276 0.73176628]
[0.50423693 0.61982219 0.65428114]


## KNN

In [8]:
# Classification Model
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier



accuracies_tops_test = np.zeros((5,3))
accuracies_tops_train = np.zeros((5,3))
tops = (1, 3, 5)


cross_validation = StratifiedKFold(n_splits = 5)

X = data_matrix[:, :cfg.params.vector_length.sentence_embedding] 
y = data_matrix[:, cfg.params.vector_length.sentence_embedding+2]


temp_test_acc = np.zeros(5)


for c, (train_index, test_index) in enumerate(cross_validation.split(X, y)):

    X_train = X[train_index, :]
    y_train = y[train_index]

    X_test = X[test_index, :]
    y_test = y[test_index]


    classification_model = KNeighborsClassifier(n_neighbors = 10)

    classification_model.fit(X_train, y_train)
    pred = classification_model.predict_proba(X_test)

    pred_ranking_test = set_labels(classification_model.predict_proba(X_test), classification_model.classes_)
    pred_ranking_train = set_labels(classification_model.predict_proba(X_train), classification_model.classes_)
    
    top1_acc_test = np.zeros(len(y_test))
    top3_acc_test = np.zeros(len(y_test))
    top5_acc_test = np.zeros(len(y_test))

    
    for j in range(len(y_test)):
        top1_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[0]])
        top3_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[1]])
        top5_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[2]])


    accuracies_tops_test[c] = [np.mean(top1_acc_test), np.mean(top3_acc_test), np.mean(top5_acc_test)]



    top1_acc_train = np.zeros(len(y_train))
    top3_acc_train = np.zeros(len(y_train))
    top5_acc_train = np.zeros(len(y_train))

    
    for j in range(len(y_train)):
        top1_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[0]])
        top3_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[1]])
        top5_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[2]])
    
    accuracies_tops_train[c] = [np.mean(top1_acc_train), np.mean(top3_acc_train), np.mean(top5_acc_train)]
    
    # break

print(accuracies_tops_train.mean(axis=0))
print(accuracies_tops_test.mean(axis=0))

train_mean = accuracies_tops_train.mean(axis=0)
train_std = accuracies_tops_train.std(axis=0)

row_train = [classification_model.__class__.__name__, 
            train_mean[0], train_std[0],
            train_mean[1], train_std[1],
            train_mean[2], train_std[2],
            "Train"
            ]

test_mean = accuracies_tops_test.mean(axis=0)
test_std = accuracies_tops_test.std(axis=0)

row_test = [classification_model.__class__.__name__, 
            test_mean[0], test_std[0],
            test_mean[1], test_std[1],
            test_mean[2], test_std[2],
            "Test"
            ]

with open(name_file, 'a', newline='') as f:
    csvwriter = csv.writer(f)
    csvwriter.writerow(row_train)
    csvwriter.writerow(row_test)
    f.close()




[0.6598748  0.89227656 0.95752261]
[0.6084299  0.7614867  0.79738931]


## Decision Tree

In [9]:
# Classification Model
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.tree import DecisionTreeClassifier



accuracies_tops_test = np.zeros((5,3))
accuracies_tops_train = np.zeros((5,3))
tops = (1, 3, 5)


cross_validation = StratifiedKFold(n_splits = 5)

X = data_matrix[:, :cfg.params.vector_length.sentence_embedding] 
y = data_matrix[:, cfg.params.vector_length.sentence_embedding+2]


temp_test_acc = np.zeros(5)


for c, (train_index, test_index) in enumerate(cross_validation.split(X, y)):

    X_train = X[train_index, :]
    y_train = y[train_index]

    X_test = X[test_index, :]
    y_test = y[test_index]


    classification_model = DecisionTreeClassifier(max_leaf_nodes=1000, criterion='gini')

    classification_model.fit(X_train, y_train)
    pred = classification_model.predict_proba(X_test)

    pred_ranking_test = set_labels(classification_model.predict_proba(X_test), classification_model.classes_)
    pred_ranking_train = set_labels(classification_model.predict_proba(X_train), classification_model.classes_)
    
    top1_acc_test = np.zeros(len(y_test))
    top3_acc_test = np.zeros(len(y_test))
    top5_acc_test = np.zeros(len(y_test))

    
    for j in range(len(y_test)):
        top1_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[0]])
        top3_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[1]])
        top5_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[2]])


    accuracies_tops_test[c] = [np.mean(top1_acc_test), np.mean(top3_acc_test), np.mean(top5_acc_test)]



    top1_acc_train = np.zeros(len(y_train))
    top3_acc_train = np.zeros(len(y_train))
    top5_acc_train = np.zeros(len(y_train))

    
    for j in range(len(y_train)):
        top1_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[0]])
        top3_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[1]])
        top5_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[2]])
    
    accuracies_tops_train[c] = [np.mean(top1_acc_train), np.mean(top3_acc_train), np.mean(top5_acc_train)]
    
    # break

print(accuracies_tops_train.mean(axis=0))
print(accuracies_tops_test.mean(axis=0))

train_mean = accuracies_tops_train.mean(axis=0)
train_std = accuracies_tops_train.std(axis=0)

row_train = [classification_model.__class__.__name__, 
            train_mean[0], train_std[0],
            train_mean[1], train_std[1],
            train_mean[2], train_std[2],
            "Train"
            ]

test_mean = accuracies_tops_test.mean(axis=0)
test_std = accuracies_tops_test.std(axis=0)

row_test = [classification_model.__class__.__name__, 
            test_mean[0], test_std[0],
            test_mean[1], test_std[1],
            test_mean[2], test_std[2],
            "Test"
            ]

with open(name_file, 'a', newline='') as f:
    csvwriter = csv.writer(f)
    csvwriter.writerow(row_train)
    csvwriter.writerow(row_test)
    f.close()




[0.41156165 0.47849455 0.50826953]
[0.36947685 0.41369613 0.4315533 ]


## SVC

In [10]:
# Classification Model
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC



accuracies_tops_test = np.zeros((5,3))
accuracies_tops_train = np.zeros((5,3))
tops = (1, 3, 5)


cross_validation = StratifiedKFold(n_splits = 5)

X = data_matrix[:, :cfg.params.vector_length.sentence_embedding] 
y = data_matrix[:, cfg.params.vector_length.sentence_embedding+2]


temp_test_acc = np.zeros(5)


for c, (train_index, test_index) in enumerate(cross_validation.split(X, y)):

    X_train = X[train_index, :]
    y_train = y[train_index]

    X_test = X[test_index, :]
    y_test = y[test_index]


    classification_model = SVC(kernel='linear', C=2, probability=True, gamma='auto')

    classification_model.fit(X_train, y_train)

    pred_ranking_test = set_labels(classification_model.predict_proba(X_test), classification_model.classes_)
    pred_ranking_train = set_labels(classification_model.predict_proba(X_train), classification_model.classes_)
    
    top1_acc_test = np.zeros(len(y_test))
    top3_acc_test = np.zeros(len(y_test))
    top5_acc_test = np.zeros(len(y_test))

    
    for j in range(len(y_test)):
        top1_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[0]])
        top3_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[1]])
        top5_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[2]])


    accuracies_tops_test[c] = [np.mean(top1_acc_test), np.mean(top3_acc_test), np.mean(top5_acc_test)]



    top1_acc_train = np.zeros(len(y_train))
    top3_acc_train = np.zeros(len(y_train))
    top5_acc_train = np.zeros(len(y_train))

    
    for j in range(len(y_train)):
        top1_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[0]])
        top3_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[1]])
        top5_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[2]])
    
    accuracies_tops_train[c] = [np.mean(top1_acc_train), np.mean(top3_acc_train), np.mean(top5_acc_train)]
    
    # break

print(accuracies_tops_train.mean(axis=0))
print(accuracies_tops_test.mean(axis=0))

train_mean = accuracies_tops_train.mean(axis=0)
train_std = accuracies_tops_train.std(axis=0)

row_train = [classification_model.__class__.__name__, 
            train_mean[0], train_std[0],
            train_mean[1], train_std[1],
            train_mean[2], train_std[2],
            "Train"
            ]

test_mean = accuracies_tops_test.mean(axis=0)
test_std = accuracies_tops_test.std(axis=0)

row_test = [classification_model.__class__.__name__, 
            test_mean[0], test_std[0],
            test_mean[1], test_std[1],
            test_mean[2], test_std[2],
            "Test"
            ]

with open(name_file, 'a', newline='') as f:
    csvwriter = csv.writer(f)
    csvwriter.writerow(row_train)
    csvwriter.writerow(row_test)
    f.close()




[0.58920729 0.7235203  0.76540925]
[0.49899587 0.62088886 0.6630682 ]


## Random Forest

In [11]:
cod = pd.DataFrame(data_matrix).iloc[:,cfg.params.vector_length.sentence_embedding+2].value_counts()
reduce_cod = cod[cod<5]
n=5
data_fill = pd.DataFrame(data_matrix).copy()

for key, value in reduce_cod.items():
    m=np.abs(n-value)
    nrow = np.zeros(cfg.params.vector_length.sentence_embedding)
    nrow = np.concatenate([nrow, np.array(['', '', key])])
    for i in range(m): data_fill = np.vstack([data_fill,nrow])


y = data_fill[:, cfg.params.vector_length.sentence_embedding+2]

labels = np.unique(y)
i=0
idx2class = {}
class2idx= {}
for tp in labels:
    idx2class[i] = tp
    class2idx[tp] = i
    i += 1

y_label = np.vectorize(class2idx.get)(y)

In [12]:
# Classification Model
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier




accuracies_tops_test = np.zeros((5,3))
accuracies_tops_train = np.zeros((5,3))
tops = (1, 3, 5)


cross_validation = StratifiedKFold(n_splits = 5)

X = data_fill[:, :cfg.params.vector_length.sentence_embedding]
y = y_label

temp_test_acc = np.zeros(5)


for c, (train_index, test_index) in enumerate(cross_validation.split(X, y)):

    X_train = X[train_index, :]
    y_train = y[train_index]

    X_test = X[test_index, :]
    y_test = y[test_index]


    classification_model = RandomForestClassifier(random_state=0, max_depth=50)

    classification_model.fit(X_train, y_train)
    pred = classification_model.predict_proba(X_test)

    pred_ranking_test = set_labels(classification_model.predict_proba(X_test), classification_model.classes_)
    pred_ranking_train = set_labels(classification_model.predict_proba(X_train), classification_model.classes_)
    
    top1_acc_test = np.zeros(len(y_test))
    top3_acc_test = np.zeros(len(y_test))
    top5_acc_test = np.zeros(len(y_test))

    
    for j in range(len(y_test)):
        top1_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[0]])
        top3_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[1]])
        top5_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[2]])


    accuracies_tops_test[c] = [np.mean(top1_acc_test), np.mean(top3_acc_test), np.mean(top5_acc_test)]



    top1_acc_train = np.zeros(len(y_train))
    top3_acc_train = np.zeros(len(y_train))
    top5_acc_train = np.zeros(len(y_train))

    
    for j in range(len(y_train)):
        top1_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[0]])
        top3_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[1]])
        top5_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[2]])
    
    accuracies_tops_train[c] = [np.mean(top1_acc_train), np.mean(top3_acc_train), np.mean(top5_acc_train)]
    
    # break

print(accuracies_tops_train.mean(axis=0))
print(accuracies_tops_test.mean(axis=0))

train_mean = accuracies_tops_train.mean(axis=0)
train_std = accuracies_tops_train.std(axis=0)

row_train = [classification_model.__class__.__name__, 
            train_mean[0], train_std[0],
            train_mean[1], train_std[1],
            train_mean[2], train_std[2],
            "Train"
            ]

test_mean = accuracies_tops_test.mean(axis=0)
test_std = accuracies_tops_test.std(axis=0)

row_test = [classification_model.__class__.__name__, 
            test_mean[0], test_std[0],
            test_mean[1], test_std[1],
            test_mean[2], test_std[2],
            "Test"
            ]

with open(name_file, 'a', newline='') as f:
    csvwriter = csv.writer(f)
    csvwriter.writerow(row_train)
    csvwriter.writerow(row_test)
    f.close()


[0.56172175 0.6014358  0.60652639]
[0.40792616 0.43430876 0.44115529]


## XGBoost

In [13]:
# Classification Model
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from xgboost import XGBClassifier



accuracies_tops_test = np.zeros((5,3))
accuracies_tops_train = np.zeros((5,3))
tops = (1, 3, 5)


cross_validation = StratifiedKFold(n_splits = 5)

X = data_fill[:, :cfg.params.vector_length.sentence_embedding]
y = y_label

temp_test_acc = np.zeros(5)


for c, (train_index, test_index) in enumerate(cross_validation.split(X, y)):

    X_train = X[train_index, :]
    y_train = y[train_index]

    X_test = X[test_index, :]
    y_test = y[test_index]


    classification_model = XGBClassifier(learning_rate=1, objective='multi:softprob', random_state=0, max_depth=5, n_estimators=14)

    classification_model.fit(X_train, y_train)
    pred = classification_model.predict_proba(X_test)

    pred_ranking_test = set_labels(classification_model.predict_proba(X_test), classification_model.classes_)
    pred_ranking_train = set_labels(classification_model.predict_proba(X_train), classification_model.classes_)
    
    top1_acc_test = np.zeros(len(y_test))
    top3_acc_test = np.zeros(len(y_test))
    top5_acc_test = np.zeros(len(y_test))

    
    for j in range(len(y_test)):
        top1_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[0]])
        top3_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[1]])
        top5_acc_test[j] = int(y_test[j] in pred_ranking_test[j, :tops[2]])


    accuracies_tops_test[c] = [np.mean(top1_acc_test), np.mean(top3_acc_test), np.mean(top5_acc_test)]



    top1_acc_train = np.zeros(len(y_train))
    top3_acc_train = np.zeros(len(y_train))
    top5_acc_train = np.zeros(len(y_train))

    
    for j in range(len(y_train)):
        top1_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[0]])
        top3_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[1]])
        top5_acc_train[j] = int(y_train[j] in pred_ranking_train[j, :tops[2]])
    
    accuracies_tops_train[c] = [np.mean(top1_acc_train), np.mean(top3_acc_train), np.mean(top5_acc_train)]
    
    # break

print(accuracies_tops_train.mean(axis=0))
print(accuracies_tops_test.mean(axis=0))

train_mean = accuracies_tops_train.mean(axis=0)
train_std = accuracies_tops_train.std(axis=0)

row_train = [classification_model.__class__.__name__, 
            train_mean[0], train_std[0],
            train_mean[1], train_std[1],
            train_mean[2], train_std[2],
            "Train"
            ]

test_mean = accuracies_tops_test.mean(axis=0)
test_std = accuracies_tops_test.std(axis=0)

row_test = [classification_model.__class__.__name__, 
            test_mean[0], test_std[0],
            test_mean[1], test_std[1],
            test_mean[2], test_std[2],
            "Test"
            ]

with open(name_file, 'a', newline='') as f:
    csvwriter = csv.writer(f)
    csvwriter.writerow(row_train)
    csvwriter.writerow(row_test)
    f.close()


[0.11037738 0.11945928 0.12466053]
[0.09290009 0.10207795 0.10815716]
