# Visualize coin data to spot specifications

In [92]:
import os
import os.path
import time
import h5py
from util.settings import Dtype, Settings
import numpy as np
import argparse
import matplotlib.pyplot as plt
from datafileviewer_template import DataFileViewer
from datafilereader import DataFileReader
import matplotlib
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


### load Data

In [93]:
folder = "./data/Groupe5/dataSetAGF-bobine3/"

coinNames = ["5_CTS", "10_CTS", "20_CTS", "50_CTS", "1_CHF", "2_CHF", "5_CHF"]

#get frequecy at the first coin (always the same)
dataset = DataFileReader(folder+coinNames[0]+".h5")
f,_ = dataset.get_all_mesurements()

# get Z for all coins
coins_Z = []
for i in range(1, 8):
    dataset = DataFileReader(folder+coinNames[i-1]+".h5")
    _,Z = dataset.get_all_mesurements()
    coins_Z.append([Z])


### compute mean for each coin

In [94]:
coins_mean = []

for i in range(len(coins_Z)):
    N = len(coins_Z[i][0]) #number of measurements for this coin
    R = np.real(coins_Z[i][0])
    L = np.imag(coins_Z[i][0])/(2*np.pi*f)
    # substract all the data by the calibration
    R = R[1:,:]-R[0,:]
    L = L[1:,:]-L[0,:]
    # calculate the mean
    R_mean = np.mean(R, axis=0)
    L_mean = np.mean(L, axis=0)
    coins_mean.append([R_mean,L_mean])

    

# search optimal features (frequency) for the classification

In [95]:

featureListR = []
featureListL = []
#search best frequencies for R
for i in range(len(coins_Z)):
    for j in range(i+1,len(coins_Z)):
        dif_R  = np.abs(coins_mean[i][0] - coins_mean[j][0])
        idxMax = np.argmax(dif_R)
        if idxMax not in featureListR:
            featureListR.append(idxMax)

#search best frequencies for L
for i in range(len(coins_Z)):
    for j in range(i+1,len(coins_Z)):
        dif_L  = np.abs(coins_mean[i][1] - coins_mean[j][1])
        idxMax = np.argmax(dif_L)
        if idxMax not in featureListL:
            featureListL.append(idxMax)



Add all data and start SVM 

In [96]:
# Function to load and merge data from all groups for each coin, using the optimal features
def load_and_merge_data_with_features(groups, coins, data_dir, feature_indices_R, feature_indices_L):
    data = []
    labels = []
    for coin in coins:
        coin_name = coin.replace('.h5', '')
        for group in groups:
            file_path = os.path.join(data_dir, group, coin)
            if os.path.isfile(file_path):
                reader = DataFileReader(file_path)
                frequency, Z = reader.get_all_mesurements()
                
                # Separate resistance and reactance
                resistance = np.real(Z)
                reactance = np.imag(Z) / (2 * np.pi * frequency)
                
                # Extract features based on optimal frequencies
                for idx in range(resistance.shape[0]):
                    features = [
                        resistance[idx, feature_indices_R],
                        reactance[idx, feature_indices_L]
                    ]
                    features = np.concatenate(features)
                    data.append(features)
                    labels.append(coin_name)
            
    data, labels = shuffle(np.array(data), np.array(labels))
    return pd.DataFrame(data), pd.Series(labels)

# Define the directory and groups
data_dir = "./data"
num_groups = 11
groups = [f"Groupe{i}" for i in range(1, num_groups + 1)]
coins = ["1_CHF.h5", "2_CHF.h5", "5_CHF.h5", "10_CTS.h5", "20_CTS.h5", "50_CTS.h5", "EUR_1.h5"]

# Load and merge data using the identified optimal features
feature_indices_R = sorted(featureListR)  # Use the optimal features for resistance
feature_indices_L = sorted(featureListL)  # Use the optimal features for reactance

X, y = load_and_merge_data_with_features(groups, coins, data_dir, feature_indices_R, feature_indices_L)

# Split data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)


In [97]:


# Train the SVM model with a non-linear kernel
svm_model = SVC(kernel='rbf', probability=True)
svm_model.fit(X_train, y_train)

# Evaluate on the validation set
y_val_pred = svm_model.predict(X_val)
print("Validation Set Report")
print(classification_report(y_val, y_val_pred))
print(confusion_matrix(y_val, y_val_pred))


Validation Set Report
              precision    recall  f1-score   support

      10_CTS       0.74      0.63      0.68        27
       1_CHF       0.65      0.91      0.76        33
      20_CTS       1.00      0.60      0.75        30
       2_CHF       1.00      0.95      0.97        39
      50_CTS       0.93      1.00      0.96        27
       5_CHF       0.93      1.00      0.96        41

    accuracy                           0.86       197
   macro avg       0.88      0.85      0.85       197
weighted avg       0.88      0.86      0.86       197

[[17  8  0  0  2  0]
 [ 2 30  0  0  0  1]
 [ 4  8 18  0  0  0]
 [ 0  0  0 37  0  2]
 [ 0  0  0  0 27  0]
 [ 0  0  0  0  0 41]]


In [98]:
# Evaluate on the test set
y_test_pred = svm_model.predict(X_test)
print("Test Set Report")
print(classification_report(y_test, y_test_pred))
print(confusion_matrix(y_test, y_test_pred))


Test Set Report
              precision    recall  f1-score   support

      10_CTS       0.69      0.55      0.61        20
       1_CHF       0.67      0.71      0.69        31
      20_CTS       0.87      0.77      0.82        35
       2_CHF       1.00      0.93      0.97        30
      50_CTS       0.90      0.96      0.93        27
       5_CHF       0.90      1.00      0.95        54

    accuracy                           0.85       197
   macro avg       0.84      0.82      0.83       197
weighted avg       0.85      0.85      0.85       197

[[11  5  1  0  3  0]
 [ 4 22  3  0  0  2]
 [ 1  6 27  0  0  1]
 [ 0  0  0 28  0  2]
 [ 0  0  0  0 26  1]
 [ 0  0  0  0  0 54]]


: 