In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

In [2]:
df = pd.read_csv('./final_dataset.csv')

In [3]:
# Nombre de classes
n_classes = df['activityID'].nunique()
n_total = 1000000
# Taille de l'échantillon par classe
n_per_class = n_total // n_classes

# Échantillonnage uniforme
sample = df.groupby('activityID', group_keys=False).apply(
    lambda x: x.sample(n=min(len(x), n_per_class), random_state=42)
)

# Réinitialiser les indices
sample = sample.reset_index(drop=True)


  sample = df.groupby('activityID', group_keys=False).apply(


In [4]:
sample['activityID'].value_counts()

activityID
0     52631
1     52631
19    52631
18    52631
17    52631
16    52631
13    52631
12    52631
11    52631
10    52631
9     52631
7     52631
6     52631
5     52631
4     52631
3     52631
2     52631
24    49360
20    46915
Name: count, dtype: int64

In [5]:
X = sample.drop(columns=["activityID", "timestamp"])  # Supprimer les colonnes inutiles
y = sample["activityID"]

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [7]:
X_test[:5]

Unnamed: 0,heart_rate,IMU_hand_1,IMU_hand_2,IMU_hand_3,IMU_hand_4,IMU_hand_5,IMU_hand_6,IMU_hand_7,IMU_hand_8,IMU_hand_9,...,IMU_ankle_8,IMU_ankle_9,IMU_ankle_10,IMU_ankle_11,IMU_ankle_12,IMU_ankle_13,IMU_ankle_14,IMU_ankle_15,IMU_ankle_16,IMU_ankle_17
790028,74.0,33.0625,-0.471641,6.16827,4.78465,-0.429769,6.08095,5.38696,-0.433044,-1.43559,...,0.264466,-0.334172,0.170506,-43.4722,7.92984,41.2428,0.080497,0.723002,0.132753,0.673175
340951,127.0,31.3125,-6.24867,4.67112,6.24267,-6.86246,4.07569,7.76876,0.421383,0.338674,...,0.292395,0.02849,1.05861,-19.4965,43.739,16.3232,0.502748,0.715163,0.261555,0.409116
415355,130.0,34.75,-1.20298,7.64628,-0.273134,-0.529832,8.20631,0.263248,0.214352,0.549867,...,-0.95708,1.38523,5.70704,-43.154,-14.7887,26.5197,0.124154,0.576958,0.596086,0.544414
140258,79.0,35.0,-1.80413,5.52362,7.88435,-1.78326,5.625,8.01688,0.512128,0.103416,...,0.04243,-0.017714,0.005101,-35.8101,2.23863,30.9226,0.044804,-0.762631,0.310686,-0.565562
542453,97.0,34.5,-1.67675,7.96842,5.60725,-1.48178,7.82568,5.79194,-0.029716,0.011717,...,0.011285,0.019302,0.01797,5.04097,5.4457,-21.6971,1.0,0.0,0.0,0.0


In [8]:

svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42, verbose=True)
svm_model.fit(X_train, y_train)

[LibSVM]..
*.
*
optimization finished, #iter = 3388
obj = -3341.741371, rho = -0.048353
nSV = 3860, nBSV = 3778
.....
*.*
optimization finished, #iter = 6185
obj = -6129.508046, rho = -7.082853
nSV = 7572, nBSV = 7498
..........
*.
*
optimization finished, #iter = 11720
obj = -11248.962913, rho = -10.168933
nSV = 14433, nBSV = 14335
.................
*..*
optimization finished, #iter = 19740
obj = -21521.491418, rho = -4.403375
nSV = 25799, nBSV = 25619
..........
*.*
optimization finished, #iter = 11741
obj = -9292.629019, rho = 4.423298
nSV = 11819, nBSV = 11615
......
*.
*.*
optimization finished, #iter = 8012
obj = -7358.490527, rho = -6.350421
nSV = 8553, nBSV = 8437
.................
*.*.*
optimization finished, #iter = 18790
obj = -20146.912762, rho = -3.546150
nSV = 25049, nBSV = 24863
.
*
optimization finished, #iter = 1442
obj = -1328.747645, rho = -3.250322
nSV = 1892, nBSV = 1854
...
*.
*
optimization finished, #iter = 4260
obj = -2947.920442, rho = -3.149214
nSV = 3766, nB

In [9]:
# Prédictions sur les données de test
y_pred = svm_model.predict(X_test)

In [13]:
# Évaluation du modèle
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

Accuracy: 90.84%

Classification Report:

              precision    recall  f1-score   support

           0       0.57      0.51      0.54     10522
           1       0.99      0.98      0.98     10689
           2       0.98      0.95      0.97     10560
           3       0.90      0.98      0.94     10418
           4       0.84      0.90      0.87     10639
           5       0.96      0.93      0.95     10527
           6       0.95      0.97      0.96     10435
           7       0.89      0.92      0.90     10718
           9       1.00      1.00      1.00     10657
          10       1.00      0.99      0.99     10392
          11       0.99      1.00      1.00     10552
          12       0.79      0.78      0.79     10463
          13       0.81      0.76      0.78     10493
          16       0.84      0.88      0.86     10534
          17       0.93      0.95      0.94     10497
          18       0.94      0.96      0.95     10490
          19       0.94      0.89      

Test du modèle

In [14]:
sample = df.sample(n=1)
y = sample['activityID']
x = sample.drop(columns=['timestamp', 'activityID'])

print(svm_model.predict(x))
print(pd.array(y))

[ 0 16  9 18  4  4 19 13  0 13]
<NumpyExtensionArray>
[20, 12, 9, 18, 4, 4, 19, 0, 0, 13]
Length: 10, dtype: int64


In [12]:
import joblib
joblib.dump(svm_model, 'svm_model_last.joblib')

['svm_model_last.joblib']