# Decision theory project

In [1]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import pdist, squareform
from sklearn.model_selection import train_test_split

In [2]:
# Create train and test datasets
data = pd.read_csv("./PlasticsTrain.csv", sep=";")

y = data["class"]
X = data.drop(["class", "line", "column", "object"], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

In [3]:
def fit_gamma(train_data, train_label, dist, classes):
    gamma = []
    nbC = len(classes)
    for i in range(nbC):
        C_indices = [t for t in range(len(train_label)) if (train_label.iloc[t] == classes[i])]
        C_dist = []
        for ci in range(len(C_indices)):
            for cj in range(ci + 1, len(C_indices)):
                C_dist.append(dist[C_indices[ci]][C_indices[cj]])
        gamma.append(1 / np.mean(C_dist) if len(C_dist) > 0 else np.nan)
    return gamma

In [4]:
# Distances
newDF = pd.concat([X_train, X_test])

# Convertir toutes les colonnes en float
newDF = newDF.replace(',', '.', regex=True)
newDF = newDF.astype(float)

distances = pdist(newDF.values, metric='euclidean')
dist_matrix = squareform(distances)

In [5]:
label = X.columns
classe = y_train.unique()

coeff_gamma = fit_gamma(X_train, y_train, dist_matrix, classe)


In [7]:
#We have the coefficients gamma_i for each class
coeff_gamma

[0.19069967085541129,
 0.2154706603038034,
 0.3829252745653152,
 0.37767173121898445]

In [12]:
#define coefficient phi and we fix alpha = 0.95
def phi(gamma, d, alpha=0.95):
    return [alpha*np.exp(-gamma[i]*d**2) for i in range(len(gamma))]

In [13]:
phi(coeff_gamma, 4)

[0.0449372580313906,
 0.03023292068987073,
 0.002074370204169698,
 0.0022562728829849956]

In [34]:
def voisins(indice, train_size, y):
    indice_triee = np.argsort(dist_matrix[indice])
    indice_voisins = []
    compteur = 1
    while len(indice_voisins) < 5:
        if(indice_triee[compteur]<train_size):
            indice_voisins.append(indice_triee[compteur])
            
        compteur += 1
    distance_voisins = dist_matrix[indice, indice_voisins]
    classes = y[indice_voisins]
    return indice_voisins, distance_voisins, classes

In [35]:
voisins(10000, len(X_train), y)

([223, 5249, 7220, 4724, 4894],
 array([0.42876488, 0.57613252, 0.58430865, 0.59136781, 0.59287058]),
 223      ABS
 5249      PE
 7220      PE
 4724    HiPS
 4894      PE
 Name: class, dtype: object)

In [36]:
y.unique()

array(['ABS', 'HiPS', 'PE', 'PP'], dtype=object)

In [43]:
def m_j(indice, train_size, y):
    indice_voisins, distance_voisins, classes = voisins(indice, train_size, y)
    coeff_ABS, coeff_HIPS, coeff_PE, coeff_PP = 1, 1, 1, 1
    #calcul des coefficients
    for i in range(len(indice_voisins)) : 
        if classes[i] == 'ABS':
            coeff_ABS *= (1- phi(coeff_gamma, distance_voisins[i])[0])
        elif classes[i] == 'HIPS':
            coeff_HIPS *= (1- phi(coeff_gamma, distance_voisins[i])[1])
        elif classes[i] == 'PE':
            coeff_PE *= (1- phi(coeff_gamma, distance_voisins[i])[2])
        elif classes[i] == 'PP':
            coeff_PP *= (1- phi(coeff_gamma, distance_voisins[i])[3])
    m_ABS=[1-coeff_ABS]
    m_HIPS=[1-coeff_HIPS]
    
        




SyntaxError: incomplete input (2251965070.py, line 6)

In [42]:
m_j(10000, len(X_train), y)

ABS
PE
PE
HiPS
PE
