In [19]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [20]:
file_path = '/content/drive/MyDrive/S3_MLDM/ML/projet_1'

Projet MLDM 27/09/2023, MOUDJAHED Mohamed


In [21]:
pip install pot



In [22]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np

import plotly.graph_objects as go

import scipy.io
from scipy.spatial.distance import cdist

from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler

import ot

# 0 - Introduction

0.1 - Importation des données

In [23]:
#'/office_caltech/surf/webcam.mat'
#'/office_caltech/surf/dslr.mat'
#'/office_caltech/surf/amazon.mat'
#'/office_caltech/surf/caltech.mat'
#W->D

data_source = scipy.io.loadmat(file_path+'/office_caltech/surf/webcam.mat')

X_source=data_source['fts']
y_source=data_source['labels']

data_target = scipy.io.loadmat(file_path+'/office_caltech/surf/dslr.mat')

X_target=data_target['fts']
y_target=data_target['labels']

print('X_source :', X_source.shape)
print('X_target :',X_target.shape)

print('y_source :', y_source.shape)
print('y_target :', y_target.shape)

X_source : (295, 800)
X_target : (157, 800)
y_source : (295, 1)
y_target : (157, 1)


0.2 - Normalisation des données

In [24]:
scaler = StandardScaler()

S = scaler.fit_transform(X_source)
T = scaler.fit_transform(X_target)

print('S :', S.shape)
print('T :', T.shape)

S : (295, 800)
T : (157, 800)


# Test sans adaptation de domaine

In [25]:
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(S, y_source.ravel())
predictions = knn.predict(T)

print('Accuracy, classification 1-NN sans adaption de domaine :', accuracy_score(y_target, predictions))

report = classification_report(y_target, predictions)
print(report)

Accuracy, classification 1-NN sans adaption de domaine : 0.3057324840764331
              precision    recall  f1-score   support

           1       0.32      0.50      0.39        12
           2       1.00      0.19      0.32        21
           3       1.00      0.08      0.15        12
           4       1.00      0.23      0.38        13
           5       0.24      0.80      0.37        10
           6       1.00      0.04      0.08        24
           7       0.41      0.55      0.47        22
           8       0.17      0.92      0.29        12
           9       1.00      0.12      0.22         8
          10       1.00      0.04      0.08        23

    accuracy                           0.31       157
   macro avg       0.71      0.35      0.27       157
weighted avg       0.75      0.31      0.26       157



# Exercice 1

Analyse des composantes sélectionnées

In [26]:

def analyse_PCA(X, nb):
    # Création de l'objet PCA
    n = X.shape[1]
    pca = PCA(n_components=n, random_state=42)
    pca.fit_transform(X)

    variance_explicative = pca.explained_variance_ratio_

    variance_cumulative = np.cumsum(variance_explicative) * 100

    fig = go.Figure(data=go.Scatter(x=np.arange(1, len(variance_cumulative) + 1), y=variance_cumulative, mode='lines+markers', name='Variance cumulée'))

    fig.add_trace(go.Scatter(x=[nb], y=[variance_cumulative[nb-1]], mode='markers', marker=dict(color='red', size=8), name='Nombre de composantes maximal choisi'))

    fig.update_layout(
        xaxis_title='Nombre de composants',
        yaxis_title='Pourcentage cumulé de variance expliquée'
    )

    fig.show()

    fig = go.Figure(data=go.Bar(x=np.arange(1, len(variance_cumulative) + 1), y=variance_explicative))

    fig.update_layout(
        xaxis_title='Composant',
        yaxis_title="Taux d'information (%)"
    )

    fig.show()

print("SOURCE")
analyse_PCA(np.transpose(S), 80)

print("TARGET")
analyse_PCA(np.transpose(T), 80)


SOURCE


TARGET


In [27]:
def subspace_alignment(S, T, d):

    def PCA(X, d):
      cov_X = np.cov(X, rowvar=False)

      eigvals_X, eigvecs_X = np.linalg.eigh(cov_X)

      # Tri des vecteurs propres et des valeurs propres
      sorted_indices = np.argsort(eigvals_X)[::-1]
      eigvecs_X = eigvecs_X[:, sorted_indices]

      return eigvecs_X[:, :d]


    #1.1
    X_s = PCA(S, d)
    X_t = PCA(T, d)

    #1.2
    M = X_s.T @ X_t
    X_a = X_s @ M

    #1.3
    S_a = S @ X_a
    T_a = T @ X_t

    #1.4
    knn = KNeighborsClassifier(n_neighbors=1)
    knn.fit(S_a, y_source.ravel())
    predictions = knn.predict(T_a)

    return S_a, T_a, predictions

accuracies = []

for d in range(1,min(S.shape[0]+1, T.shape[0]+1)) :

    y_pred = subspace_alignment(S, T, d)[2]
    accuracy = accuracy_score(y_target, y_pred)
    accuracies.append(accuracy)


fig = go.Figure(data=go.Scatter(x=list(range(1, min(S.shape[0] + 1, T.shape[0] + 1))), y=accuracies, mode='lines'))
fig.update_layout(
    title='Accuracy en fonction de d',
    xaxis=dict(title='d'),
    yaxis=dict(title='Accuracy')
)

fig.show()

à d = 62 on a l'accuracy la plus élevée

In [28]:
y_pred = subspace_alignment(S, T, 62)[2]
report = classification_report(y_target, y_pred)
print(report)

              precision    recall  f1-score   support

           1       0.92      1.00      0.96        12
           2       1.00      1.00      1.00        21
           3       1.00      1.00      1.00        12
           4       0.93      1.00      0.96        13
           5       0.89      0.80      0.84        10
           6       0.92      0.92      0.92        24
           7       0.88      0.95      0.91        22
           8       1.00      0.92      0.96        12
           9       0.88      0.88      0.88         8
          10       0.95      0.87      0.91        23

    accuracy                           0.94       157
   macro avg       0.94      0.93      0.93       157
weighted avg       0.94      0.94      0.94       157



# Exercice 2

In [29]:
def entropic_ot_transport(S, T, rege):
    #2.1
    ns = len(S)
    nt = len(T)
    a = np.ones(ns) / ns
    b = np.ones(nt) / nt

    #2.2
    M = cdist(S, T)
    M = M / np.max(M)

    #2.3
    gamma = ot.sinkhorn(a, b, M, rege)

    #2.4
    Sa = np.dot(gamma, T)

    #2.5
    knn = KNeighborsClassifier(n_neighbors=1)
    knn.fit(Sa, y_source.ravel())
    predictions = knn.predict(T)

    return predictions

debut_rege = 0
fin_rege = 1.5
pas_rege = 0.01
rege_values = np.arange(debut_rege, fin_rege + pas_rege, pas_rege).tolist()

accuracies=[]

for rege in rege_values:
    y_pred_2 = entropic_ot_transport(S, T, rege)
    accuracy = accuracy_score(y_target, y_pred_2.ravel())
    accuracies.append(accuracy)

fig = go.Figure(data=go.Scatter(x=rege_values, y=accuracies, mode='lines+markers'))
fig.update_layout(title='Accuracy en fonction de rege',
                  xaxis_title='Valeur de rege', yaxis_title='Accuracy')

fig.show()

In [30]:
y_pred = entropic_ot_transport(S, T, 0.1)

report = classification_report(y_target, y_pred)
print(report)

              precision    recall  f1-score   support

           1       1.00      0.33      0.50        12
           2       0.83      0.95      0.89        21
           3       1.00      1.00      1.00        12
           4       0.63      0.92      0.75        13
           5       0.91      1.00      0.95        10
           6       1.00      0.62      0.77        24
           7       0.64      0.95      0.76        22
           8       0.82      0.75      0.78        12
           9       0.86      0.75      0.80         8
          10       0.81      0.74      0.77        23

    accuracy                           0.80       157
   macro avg       0.85      0.80      0.80       157
weighted avg       0.84      0.80      0.79       157

