In [4]:
# Import pandas and numpy for data manipulation
import pandas as pd
import numpy as np
import time

# Import modules from Scikit-learn
from sklearn.svm import SVC                            # Import SVM model using guassian
from sklearn.model_selection import train_test_split   # Import train_test_split function
from sklearn import metrics                            # import metrics modules for accuracy calculation
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from tqdm import tqdm

# Parameter Ablation Analysis 

In [5]:
# Read data
PATH = "../../my_data/identification-dataset/my_custom_data/anblock-error-dataset.csv"
df = pd.read_csv(PATH)

# Drop uncomplete rows
df.dropna(inplace=True)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59535 entries, 0 to 59534
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   tdoa12            59535 non-null  float64
 1   tdoa21            59535 non-null  float64
 2   snr_an1           59535 non-null  float64
 3   power_dif_an1     59535 non-null  float64
 4   snr_an2           59535 non-null  float64
 5   power_dif_an2     59535 non-null  float64
 6   an1_rx_snr        59535 non-null  float64
 7   an1_rx_powerdif   59535 non-null  float64
 8   an1_tof           59535 non-null  float64
 9   an2_rx_snr        59535 non-null  float64
 10  an2_rx_powerdif   59535 non-null  float64
 11  an2_tof           59535 non-null  float64
 12  material          59535 non-null  object 
 13  encoded_material  59535 non-null  int64  
 14  err_an1           59535 non-null  float64
 15  err_an2           59535 non-null  float64
dtypes: float64(14), int64(1), object(1)
memo

In [7]:
# Set training data
train_df = df.drop('material', axis=1)

# Extracted features 
X = train_df.drop('encoded_material', axis=1)
y = train_df['encoded_material'] # Labels

In [8]:
# Split dataset into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3) # 70% training and 30% test

In [9]:
# c values
# Number of `C` values to consider
num_c_values = 10

# Generate logarithmically spaced `C` values
c_values = np.logspace(-3, 3, num=num_c_values)
c_values

array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,
       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,
       2.15443469e+02, 1.00000000e+03])

In [10]:
# Create an empty list to store results
def trainer(c_values):
    results = []
    for c in c_values:
        # Create a pipeline object for our model SVM using rbf kernel
        pipe_SVM = make_pipeline(StandardScaler(),
                                 SVC(kernel='rbf',
                                     C=c, 
                                     cache_size=2000,
                                     verbose=0)
                                )
        
        # Measure training time
        start_train = time.time()
        pipe_SVM.fit(X_train, y_train)
        end_train = time.time()
        train_time_per_sample = (end_train - start_train) / len(X_train)

        # Measure test time
        start_test = time.time()
        y_pred = pipe_SVM.predict(X_test)
        end_test = time.time()
        test_time_per_sample = (end_test - start_test) / len(X_test)
        
        # Evaluate the pipeline and store the results
        accuracy = metrics.accuracy_score(y_test, y_pred)
        precision = metrics.precision_score(y_test, y_pred, average="macro")
        recall = metrics.recall_score(y_test, y_pred, average="macro")
        f1 = metrics.recall_score(y_test, y_pred, average="macro")


        results.append({
            'c_value': c,
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'train_time_per_sample': train_time_per_sample,
            'test_time_per_sample': test_time_per_sample 
        })
    return results

In [11]:
results = []
for x in tqdm(np.arange(1)): # should be 10
    r = trainer(c_values)
    results.append(r)

100%|██████████| 1/1 [17:21<00:00, 1041.23s/it]


In [16]:
# show one of 10 samples
results_df = pd.DataFrame(results[0])
results_df

Unnamed: 0,c_value,accuracy,precision,recall,f1,train_time_per_sample,test_time_per_sample
0,0.001,0.56587,0.648284,0.562425,0.562425,0.003614,0.004322
1,0.004642,0.672975,0.667052,0.672785,0.672785,0.002337,0.003587
2,0.021544,0.717709,0.713942,0.717791,0.717791,0.001528,0.003088
3,0.1,0.748558,0.746148,0.748866,0.748866,0.001148,0.002683
4,0.464159,0.781367,0.779544,0.781928,0.781928,0.000944,0.00228
5,2.154435,0.828845,0.827537,0.829551,0.829551,0.000779,0.002035
6,10.0,0.894127,0.893801,0.894618,0.894618,0.00082,0.001781
7,46.415888,0.942109,0.94269,0.942267,0.942267,0.000928,0.001335
8,215.443469,0.966911,0.966989,0.966921,0.966921,0.00122,0.000959
9,1000.0,0.98074,0.980687,0.980751,0.980751,0.001899,0.000709


In [17]:
# Round the columns to the 4th decimal place
results_df['accuracy'] = results_df['accuracy'].round(2)
results_df['precision'] = results_df['precision'].round(2)
results_df['recall'] = results_df['recall'].round(2)
results_df['f1'] = results_df['f1'].round(2)

# Scale from seconds to milliseconds
results_df['train_time_per_sample'] = results_df['train_time_per_sample'] * 1000 # to millisec. 
results_df['train_time_per_sample'] = results_df['train_time_per_sample'].round(2)

results_df['test_time_per_sample'] = results_df['test_time_per_sample'] * 1000 # to millisec.
results_df['test_time_per_sample'] = results_df['test_time_per_sample'].round(2)

results_df

Unnamed: 0,c_value,accuracy,precision,recall,f1,train_time_per_sample,test_time_per_sample
0,0.001,0.57,0.65,0.56,0.56,3.61,4.32
1,0.004642,0.67,0.67,0.67,0.67,2.34,3.59
2,0.021544,0.72,0.71,0.72,0.72,1.53,3.09
3,0.1,0.75,0.75,0.75,0.75,1.15,2.68
4,0.464159,0.78,0.78,0.78,0.78,0.94,2.28
5,2.154435,0.83,0.83,0.83,0.83,0.78,2.03
6,10.0,0.89,0.89,0.89,0.89,0.82,1.78
7,46.415888,0.94,0.94,0.94,0.94,0.93,1.34
8,215.443469,0.97,0.97,0.97,0.97,1.22,0.96
9,1000.0,0.98,0.98,0.98,0.98,1.9,0.71


In [18]:
# Convert DataFrame to LaTeX table
latex_table = results_df.to_latex(index=False)
print(latex_table)

\begin{tabular}{rrrrrrr}
\toprule
    c\_value &  accuracy &  precision &  recall &   f1 &  train\_time\_per\_sample &  test\_time\_per\_sample \\
\midrule
   0.001000 &      0.57 &       0.65 &    0.56 & 0.56 &                   3.61 &                  4.32 \\
   0.004642 &      0.67 &       0.67 &    0.67 & 0.67 &                   2.34 &                  3.59 \\
   0.021544 &      0.72 &       0.71 &    0.72 & 0.72 &                   1.53 &                  3.09 \\
   0.100000 &      0.75 &       0.75 &    0.75 & 0.75 &                   1.15 &                  2.68 \\
   0.464159 &      0.78 &       0.78 &    0.78 & 0.78 &                   0.94 &                  2.28 \\
   2.154435 &      0.83 &       0.83 &    0.83 & 0.83 &                   0.78 &                  2.03 \\
  10.000000 &      0.89 &       0.89 &    0.89 & 0.89 &                   0.82 &                  1.78 \\
  46.415888 &      0.94 &       0.94 &    0.94 & 0.94 &                   0.93 &                  1.34

  latex_table = results_df.to_latex(index=False)
