In [1]:
import os
import pandas as pd
import numpy as np
from scipy.signal import hilbert
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sktime.transformations.panel.rocket import MiniRocketMultivariateVariable, MiniRocketMultivariate
from sklearn.linear_model import RidgeClassifierCV
from module import preprocess_bluetooth_signals, my_improved_bayesian_change_point_detection

In [2]:
# Function to load data
def load_data(root_dir):
    X = []
    y = []
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            if file.endswith('.txt'):
                file_path = os.path.join(root, file)
                with open(file_path, 'r') as f:
                    values = np.array([float(line.strip()) for line in f])
                X.append(pd.Series(values))           
                # Use folder name as label
                label = os.path.relpath(root, root_dir)
                y.append(label)
    
    df = pd.DataFrame({'signal': X, 'label': y})
    return df

In [3]:
# Set the root directory
root_directory = os.path.join(os.path.join(os.getcwd(), 'Bluetooth Datasets'), 'Dataset 10 Gsps')

In [4]:
# Load the data
data = load_data(root_directory)

In [5]:
preprocessed_data = preprocess_bluetooth_signals(data, signal_column='signal', dataset='B')

Preprocessing 2398 signals from dataset B...
Processing signal 1/2398...
Processing signal 1001/2398...
Processing signal 2001/2398...
Preprocessing complete!


In [6]:
preprocessed_data['noise_removed'] = None
for idx, row in preprocessed_data.iterrows():
    analytic_signal = row['analytic_signal']
    original_signal = row['signal']
    start_idx, end_idx, _, _ = my_improved_bayesian_change_point_detection(analytic_signal, window_size=1200, overlap=0.65, start_threshold=10)
    preprocessed_data.at[idx, 'noise_removed'] = original_signal[start_idx:]

In [7]:
# Find the minimum length among all Series in the column
min_len = min(s.size for s in preprocessed_data['noise_removed'])

# Truncate each Series in a loop and assign back as a list
truncated = [s.iloc[:min_len] for s in preprocessed_data['noise_removed']]
preprocessed_data['truncated'] = truncated


In [8]:
preprocessed_data.head()

Unnamed: 0,signal,label,filtered_signal,normalized_signal,analytic_signal,I_data,Q_data,noise_removed,truncated
0,0 0.0035 1 0.0021 2 0.004...,Iphone\4s\013004004984503_oguz_guler,"[1.4530537899018204e-05, 3.152383494460062e-05...","[0.003675272541188227, 0.007973461531157001, -...","[(0.00367527254118819-0.3088209019854309j), (0...","[0.00367527254118819, 0.007973461531157074, -0...","[-0.3088209019854309, 0.1006619132058073, -0.0...",26880 0.0039 26881 0.0027 26882 0.003...,26880 0.0039 26881 0.0027 26882 0.003...
1,0 0.0039 1 0.0019 2 0.003...,Iphone\4s\013004004984503_oguz_guler,"[1.662943702275284e-05, -2.6273821287584196e-0...","[0.004024434297083603, -0.006358439396386452, ...","[(0.004024434297083448+0.44494557981805255j), ...","[0.004024434297083448, -0.00635843939638646, -...","[0.44494557981805255, 0.052186358528965485, 0....",30240 0.0039 30241 0.0025 30242 0.003...,30240 0.0039 30241 0.0025 30242 0.003...
2,0 0.0021 1 0.0045 2 0.001...,Iphone\4s\013004004984503_oguz_guler,"[-0.00010070339870325695, -0.00015392280460987...","[-0.0246819690972796, -0.03772581616577233, 0....","[(-0.02468196909727891+0.14546014623329678j), ...","[-0.02468196909727891, -0.03772581616577299, 0...","[0.14546014623329678, 0.09519909928888141, -0....",34440 0.0015 34441 0.0045 34442 0.001...,34440 0.0015 34441 0.0045 34442 0.001...
3,0 0.0023 1 0.0035 2 0.001...,Iphone\4s\013004004984503_oguz_guler,"[-0.00010071881868998701, -0.00027686485288218...","[-0.025061753265929604, -0.06889197789639094, ...","[(-0.025061753265929534-0.384899755988887j), (...","[-0.025061753265929534, -0.0688919778963903, 0...","[-0.384899755988887, -0.08053463958582961, -0....",41160 0.0013 41161 0.0041 41162 0.002...,41160 0.0013 41161 0.0041 41162 0.002...
4,0 0.0033 1 0.0023 2 0.003...,Iphone\4s\013004004984503_oguz_guler,"[3.1252974599294536e-05, 1.7284553472946328e-0...","[0.007830822356315405, 0.0004330860325785836, ...","[(0.007830822356315626-0.2084549451810903j), (...","[0.007830822356315626, 0.00043308603257894525,...","[-0.2084549451810903, -0.10389299448975445, -0...",36540 0.0035 36541 0.0017 36542 0.003...,36540 0.0035 36541 0.0017 36542 0.003...


In [9]:
real_parts = []
imag_parts = []

for i in range(len(preprocessed_data)):
    signal = preprocessed_data.loc[i, 'truncated']
    analytic = hilbert(signal.values)
    real_parts.append(pd.Series(np.real(analytic)))
    imag_parts.append(pd.Series(np.imag(analytic)))

preprocessed_data['real'] = real_parts
preprocessed_data['imaginary'] = imag_parts

In [10]:
print(preprocessed_data['label'].value_counts())

label
Iphone\4s\013004004984503_oguz_guler              150
Iphone\4s\013051002641007_koray_oktem             150
Iphone\7\356563081643675_cuneyt_buyukkilic        150
Iphone\7\359206078179249_ismet_buyukkilic         150
Iphone\7plus\355373083202269_akin_yavuz           150
Iphone\7plus\359179071432156_melisa_topcu         150
Lg\V20\3521162080037221_ozan_erbasan              150
Lg\V20\352162080046735_melis_yilmaz               150
Samsung\J7\352952078845142_omerfaruk_altuntas     150
Samsung\J7\359947076663425_onder_sahin            150
Samsung\Note2\355056057465265_yesim_balci         150
Samsung\S7edge\357220078785725_melisa_oktem       150
Xiaomi\Mi6\867391032321064_berat_serefoglu        150
Xiaomi\Mi6\864890030385966_umut_erkan             150
Samsung\Note2\356261053336200_ismet_buyukkilic    149
Samsung\S7edge\359948079357437_baris_ozgunduz     149
Name: count, dtype: int64


In [11]:
# Split the data into training and test sets, ensuring 30 examples per label for the test set
train_data = pd.DataFrame()
test_data = pd.DataFrame()

In [12]:
for label, group in preprocessed_data[['label', 'real', 'imaginary']].groupby('label'):
    train, test = train_test_split(group, test_size=30, random_state=0)
    train_data = pd.concat([train_data, train])
    test_data = pd.concat([test_data, test])

In [13]:
# Separate the features (signal data) and labels for training and test sets
X_train = train_data[['real', 'imaginary']]
y_train_series = train_data['label']
y_train = y_train_series.values
X_test = test_data[['real', 'imaginary']]
y_test_series = test_data['label']
y_test = y_test_series.values

In [14]:
y_train_series.value_counts()

label
Iphone\4s\013004004984503_oguz_guler              120
Iphone\4s\013051002641007_koray_oktem             120
Iphone\7\356563081643675_cuneyt_buyukkilic        120
Iphone\7\359206078179249_ismet_buyukkilic         120
Iphone\7plus\355373083202269_akin_yavuz           120
Iphone\7plus\359179071432156_melisa_topcu         120
Lg\V20\3521162080037221_ozan_erbasan              120
Lg\V20\352162080046735_melis_yilmaz               120
Samsung\J7\352952078845142_omerfaruk_altuntas     120
Samsung\J7\359947076663425_onder_sahin            120
Samsung\Note2\355056057465265_yesim_balci         120
Samsung\S7edge\357220078785725_melisa_oktem       120
Xiaomi\Mi6\867391032321064_berat_serefoglu        120
Xiaomi\Mi6\864890030385966_umut_erkan             120
Samsung\Note2\356261053336200_ismet_buyukkilic    119
Samsung\S7edge\359948079357437_baris_ozgunduz     119
Name: count, dtype: int64

In [15]:
y_test_series.value_counts()

label
Iphone\4s\013004004984503_oguz_guler              30
Iphone\4s\013051002641007_koray_oktem             30
Iphone\7\356563081643675_cuneyt_buyukkilic        30
Iphone\7\359206078179249_ismet_buyukkilic         30
Iphone\7plus\355373083202269_akin_yavuz           30
Iphone\7plus\359179071432156_melisa_topcu         30
Lg\V20\3521162080037221_ozan_erbasan              30
Lg\V20\352162080046735_melis_yilmaz               30
Samsung\J7\352952078845142_omerfaruk_altuntas     30
Samsung\J7\359947076663425_onder_sahin            30
Samsung\Note2\355056057465265_yesim_balci         30
Samsung\Note2\356261053336200_ismet_buyukkilic    30
Samsung\S7edge\357220078785725_melisa_oktem       30
Samsung\S7edge\359948079357437_baris_ozgunduz     30
Xiaomi\Mi6\864890030385966_umut_erkan             30
Xiaomi\Mi6\867391032321064_berat_serefoglu        30
Name: count, dtype: int64

In [16]:
X_train.head()

Unnamed: 0,real,imaginary
137,0 0.0019 1 0.0047 2 0.001...,0 -0.009563 1 0.004906 2 -0...
84,0 0.0043 1 0.0013 2 0.003...,0 0.008017 1 -0.004958 2 0...
27,0 0.0049 1 0.0029 2 0.003...,0 0.007653 1 -0.005444 2 0...
127,0 0.0039 1 0.0019 2 0.004...,0 0.006889 1 -0.006798 2 0...
132,0 0.0041 1 0.0029 2 0.004...,0 0.008060 1 -0.006273 2 0...


In [17]:
X_test.head()

Unnamed: 0,real,imaginary
114,0 0.0025 1 0.0051 2 0.003...,0 -0.009551 1 0.005242 2 -0...
62,0 0.0013 1 0.0041 2 0.000...,0 -0.005609 1 0.006007 2 -0...
33,0 0.0033 1 0.0027 2 0.004...,0 0.004083 1 -0.005499 2 0...
107,0 0.0011 1 0.0031 2 0.001...,0 -0.008820 1 0.004926 2 -0...
7,0 0.0037 1 0.0023 2 0.003...,0 0.003429 1 -0.004639 2 0...


In [18]:
# Setup pipeline
bluetooth_pipeline = make_pipeline(
    MiniRocketMultivariate(
        random_state=42,
        max_dilations_per_kernel=32,
        n_jobs=-1
    ),
    RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
)

In [19]:
# Fit the pipeline
bluetooth_pipeline.fit(X_train, y_train)

In [20]:
# Evaluate the pipeline
accuracy = bluetooth_pipeline.score(X_test, y_test)
print(f"Test Accuracy: {accuracy:.3f}")

Test Accuracy: 0.981


In [None]:
# Predict labels for the test set
y_pred = bluetooth_pipeline.predict(X_test)

# Create a DataFrame to compare true and predicted labels
results = pd.DataFrame({
    'True Label': y_test,
    'Predicted Label': y_pred
})

# Identify incorrect predictions
incorrect_predictions = results[results['True Label'] != results['Predicted Label']]

# Print incorrect predictions
print("Incorrect Predictions:")
print(incorrect_predictions)

Incorrect Predictions:
                                          True Label  \
1195             Lg/V20/352162080046735_melis_yilmaz   
1536       Samsung/Note2/355056057465265_yesim_balci   
1686  Samsung/Note2/356261053336200_ismet_buyukkilic   
1659  Samsung/Note2/356261053336200_ismet_buyukkilic   
2078   Samsung/S7edge/359948079357437_baris_ozgunduz   

                                     Predicted Label  
1195       Samsung/Note2/355056057465265_yesim_balci  
1536  Samsung/Note2/356261053336200_ismet_buyukkilic  
1686       Samsung/Note2/355056057465265_yesim_balci  
1659       Samsung/Note2/355056057465265_yesim_balci  
2078           Iphone/4s/013051002641007_koray_oktem  
