# Classical ML Running, 
## Multiclass Performance with Single Label Training & Prediction, 

In [85]:
# Imports
import numpy as np
import pandas as pd
import time

In [86]:
# generate data imports
from sklearn.datasets import make_classification, make_multilabel_classification
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier

In [87]:
# for data split
from sklearn.model_selection import train_test_split
from qiskit_algorithms.utils import algorithm_globals
algorithm_globals.random_seed = 123
from pycaret.classification import *

In [100]:
class classicalMultiLabelAlgoTraining:
    """
    Class to train algorithms."""
    def __init__(self, no_of_features, no_of_samples, no_of_classes, no_of_labels, models = ['rf']):
        self.no_of_features = no_of_features
        self.no_of_samples = no_of_samples
        self.no_of_labels = no_of_labels
        self.no_of_classes = no_of_classes
        self.models = models
        print(f"the no of classes is {self.no_of_classes}")

    def data_generation(self):
        """
        Generate classification data using sklearn's data generation.\
        """
        n_samples=self.no_of_samples
        n_features=self.no_of_features
        n_classes=self.no_of_classes
        n_labels=self.no_of_labels
        print(n_samples)
        X, y = make_multilabel_classification(n_samples=n_samples, 
                                              n_features=n_features, 
                                              n_classes=n_classes, 
                                              n_labels=n_labels,
                                              random_state=algorithm_globals.random_seed
                                             )
        y_new = np.array([self.conv_to_int(val) for val in y])
        return X, y, y_new

    def int_to_hot(self, x):
        "convert to labels of [0s, 1s]"
        format = '{' + '0:0{:d}b'.format(self.no_of_classes) + '}'
        result = format.format(x)
        list_of_ints = [int(x) for x in result]
        return np.array(list_of_ints)

    def get_mapping(self):
        dictionary = {}
        numbers = list(range(0, 2**self.no_of_classes))
        for i in numbers:
            dictionary[i] = self.int_to_hot(i)
        return dictionary

    def conv_to_int(self, vector):
        "convert d-dimensional array to integer"
        val = "".join(vector.astype(str))
        value = int(val, 2)
        return value

    def pycaret_training(self):
        X, y, y_new = self.data_generation()
        print(X.shape, y.shape, y_new.shape)
        exp_name = setup(data = X,  target = y, train_size=0.8)
        mapping = self.get_mapping()
        for model in self.models:
            start = time.time()
            model_created = create_model(model, verbose=False)
            end = time.time()
            time_elapsed = end - start
            predicted_model = predict_model(model_created)
            predictions = model_created.predict(X)
            short_results = pull(model_created)
            # Use list comprehension to create the new array
            # predictions_labels_final = np.array([mapping[val] for val in predictions])
            accuracy_scores = accuracy_score(y, predictions)
            f1_scores = f1_score(y, predictions_labels_final, average='weighted')
            results.setdefault('Model', []).append(short_results['Model'].values[0])
            results.setdefault('No of features', []).append(self.no_of_features)
            results.setdefault('No of samples', []).append(self.no_of_samples)
            results.setdefault('No of classes', []).append(self.no_of_classes)
            results.setdefault('No of labels', []).append(self.no_of_labels)
            
            results.setdefault('Accuracy', []).append(accuracy_scores)
            results.setdefault('F1', []).append(f1_scores)
            results.setdefault('Time taken', []).append(time_elapsed)

    def cikit_training(self):
        X, y, y_new = self.data_generation()
        clf = RandomForestClassifier(random_state=0)
        # clf = KNeighborsClassifier()
        # clf = svm.SVC()
        # clf = QuadraticDiscriminantAnalysis()
        # clf = GradientBoostingClassifier()
        clf.fit(X,y_new)
        mapping = self.get_mapping()
        predictions = clf.predict(X)
        predictions_labels_final = np.array([mapping[val] for val in predictions])
        # accuracy_scores = accuracy_score(y, predictions)
        f1_scores = f1_score(y, predictions_labels_final, average='weighted')
        # accuracy_scores = accuracy_score(y, predictions)
        # f1_scores = f1_score(y, predictions, average='weighted')
        results.setdefault('Model', []).append('Light Gradient Boosting Machine')
        results.setdefault('No of features', []).append(self.no_of_features)
        results.setdefault('No of samples', []).append(self.no_of_samples)
        results.setdefault('No of classes', []).append(self.no_of_classes)
        results.setdefault('No of labels', []).append(self.no_of_labels)
            
        results.setdefault('Accuracy', []).append('NA')
        results.setdefault('F1', []).append(f1_scores)
        # results.setdefault('Time taken', []).append(time_elapsed)

In [101]:
results = {}
no_of_features = [2,4,8,12] # as much as quantum side allows
no_of_samples = 5000 # only 1024
no_of_classes = [3,4,5,6,8,12] # look for quantum side
no_of_labels = [3,4,5,6,8,12 ] # starting from 2, till max no of classes.

In [102]:
for no_feature in no_of_features:
    for no_class in no_of_classes:
        for no_label in no_of_labels:
            if no_label <= no_class:
                # print(f"the setup is {no_feature}, {no_class}, {no_label}")
                training_object = classicalMultiLabelAlgoTraining(no_feature, no_of_samples, no_class, no_label)
                # training_object.pycaret_training()
                training_object.cikit_training()

the no of classes is 3
5000
the no of classes is 4
5000
the no of classes is 4
5000
the no of classes is 5
5000
the no of classes is 5
5000
the no of classes is 5
5000
the no of classes is 6
5000
the no of classes is 6
5000
the no of classes is 6
5000
the no of classes is 6
5000
the no of classes is 8
5000
the no of classes is 8
5000
the no of classes is 8
5000
the no of classes is 8
5000
the no of classes is 8
5000
the no of classes is 12
5000
the no of classes is 12
5000
the no of classes is 12
5000
the no of classes is 12
5000
the no of classes is 12
5000
the no of classes is 12
5000
the no of classes is 3
5000
the no of classes is 4
5000
the no of classes is 4
5000
the no of classes is 5
5000
the no of classes is 5
5000
the no of classes is 5
5000
the no of classes is 6
5000
the no of classes is 6
5000
the no of classes is 6
5000
the no of classes is 6
5000
the no of classes is 8
5000
the no of classes is 8
5000
the no of classes is 8
5000
the no of classes is 8
5000
the no of clas

In [103]:
df = pd.DataFrame(results)
df.to_csv(f'''results/Final/MC-ML-experiments.csv''', index=False)