In [1]:
import os
import pandas as pd
import numpy as np
from scipy.signal import hilbert
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sktime.transformations.panel.rocket import MiniRocketMultivariateVariable, MiniRocketMultivariate
from sklearn.linear_model import RidgeClassifierCV
from module import preprocess_bluetooth_signals, my_improved_bayesian_change_point_detection

In [2]:
# Function to load data
def load_data(root_dir):
    X = []
    y = []
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            if file.endswith('.txt'):
                file_path = os.path.join(root, file)
                with open(file_path, 'r') as f:
                    values = np.array([float(line.strip()) for line in f])
                X.append(pd.Series(values))           
                # Use folder name as label
                label = os.path.relpath(root, root_dir)
                y.append(label)
    
    df = pd.DataFrame({'signal': X, 'label': y})
    return df

In [3]:
# Set the root directory
root_directory = os.path.join(os.path.join(os.getcwd(), 'Bluetooth Datasets'), 'Dataset 5 Gsps')

In [4]:
# Load the data
data = load_data(root_directory)

In [5]:
preprocessed_data = preprocess_bluetooth_signals(data, signal_column='signal', dataset='A')

Preprocessing 2548 signals from dataset A...
Processing signal 1/2548...
Processing signal 1001/2548...
Processing signal 2001/2548...
Preprocessing complete!


In [6]:
preprocessed_data['noise_removed'] = None
for idx, row in preprocessed_data.iterrows():
    analytic_signal = row['analytic_signal']
    original_signal = row['signal']
    start_idx, end_idx, _, _ = my_improved_bayesian_change_point_detection(analytic_signal, window_size=600, overlap=0.65, start_threshold=10, end_threshold=2)
    preprocessed_data.at[idx, 'noise_removed'] = original_signal[start_idx:]

In [7]:
# Find the minimum length among all Series in the column
min_len = min(s.size for s in preprocessed_data['noise_removed'])

# Truncate each Series in a loop and assign back as a list
truncated = [s.iloc[:min_len] for s in preprocessed_data['noise_removed']]
preprocessed_data['truncated'] = truncated


In [8]:
preprocessed_data.head()

Unnamed: 0,signal,label,filtered_signal,normalized_signal,analytic_signal,I_data,Q_data,noise_removed,truncated
0,0 0.0033 1 0.0037 2 0.002...,Iphone/5/013409009258565_gamze_uyuk,"[0.0033, 0.0037, 0.0029, 0.0033, 0.0031, 0.003...","[0.18857142857142856, 0.2114285714285714, 0.16...","[(0.18857142857142867-0.7898769431704035j), (0...","[0.18857142857142867, 0.21142857142857138, 0.1...","[-0.7898769431704035, 0.4880644065043696, -0.4...",15120 0.0015 15121 0.0033 15122 0.001...,15120 0.0015 15121 0.0033 15122 0.001...
1,0 0.0031 1 0.0037 2 0.001...,Iphone/5/013409009258565_gamze_uyuk,"[0.0031, 0.0037, 0.0017, 0.0041, 0.0041, 0.003...","[0.19254658385093168, 0.2298136645962733, 0.10...","[(0.19254658385093146-0.7767565119435179j), (0...","[0.19254658385093146, 0.22981366459627312, 0.1...","[-0.7767565119435179, 0.47318468485819104, -0....",16380 0.0025 16381 0.0021 16382 0.003...,16380 0.0025 16381 0.0021 16382 0.003...
2,0 0.0033 1 0.0041 2 0.002...,Iphone/5/013409009258565_gamze_uyuk,"[0.0033, 0.0041, 0.0029, 0.0033, 0.0027, 0.003...","[0.19075144508670522, 0.2369942196531792, 0.16...","[(0.19075144508670502-0.6742843875958302j), (0...","[0.19075144508670502, 0.23699421965317927, 0.1...","[-0.6742843875958302, 0.21127063040912833, -0....",14700 0.0041 14701 0.0031 14702 0.002...,14700 0.0041 14701 0.0031 14702 0.002...
3,0 0.0035 1 0.0035 2 0.003...,Iphone/5/013409009258565_gamze_uyuk,"[0.0035, 0.0035, 0.0035, 0.0041, 0.0045, 0.003...","[0.20710059171597636, 0.20710059171597636, 0.2...","[(0.20710059171597586+0.10583288895493895j), (...","[0.20710059171597586, 0.20710059171597647, 0.2...","[0.10583288895493895, 0.0967974458841826, -0.0...",13440 0.0051 13441 0.0027 13442 0.005...,13440 0.0051 13441 0.0027 13442 0.005...
4,0 0.0039 1 0.0039 2 0.003...,Iphone/5/013409009258565_gamze_uyuk,"[0.0039, 0.0039, 0.0033, 0.0037, 0.0035, 0.004...","[0.2392638036809816, 0.2392638036809816, 0.202...","[(0.2392638036809817-0.7355591243003309j), (0....","[0.2392638036809817, 0.2392638036809814, 0.202...","[-0.7355591243003309, 0.2847687808493694, -0.3...",16590 0.0063 16591 0.0017 16592 0.006...,16590 0.0063 16591 0.0017 16592 0.006...


In [9]:
real_parts = []
imag_parts = []

for i in range(len(preprocessed_data)):
    signal = preprocessed_data.loc[i, 'truncated']
    analytic = hilbert(signal.values)
    real_parts.append(pd.Series(np.real(analytic)))
    imag_parts.append(pd.Series(np.imag(analytic)))

preprocessed_data['real'] = real_parts
preprocessed_data['imaginary'] = imag_parts

In [10]:
print(preprocessed_data['label'].value_counts())

label
Iphone/5/013409009258565_gamze_uyuk                150
Iphone/5/013737001703349_makbule_guclu             150
Iphone/5s/352053069423260_mustafa_gungor           150
Iphone/6/354427066558690_mustafa_guclu             150
Iphone/6/355401074766578_berat_serefoglu           150
Iphone/6s/355694077593599_deniz_yilmaz             150
Iphone/6s/353308076325778_tugce_ozkan              150
LG/G4/352334073607175_mert_kilic                   150
LG/G4/352334072148270_mertcan_yurtseven            150
Samsung/S5/353812060996205_melisa_oktem            150
Samsung/S5/353812063461314_ismet_buyukkilic        150
Samsung/note3/359542051326328_gulden_vatansever    150
Samsung/note3/359543054106238_burak_uyuk           150
Sony/XperiaM5/354188070809491_firat_vural          150
Sony/XperiaM5/354188076543367_leman_cetindere      150
Iphone/6s/355396082974273_melisa_oktem             149
Iphone/5s/359261061140526_melisa_topcu             149
Name: count, dtype: int64


In [11]:
# Split the data into training and test sets, ensuring 30 examples per label for the test set
train_data = pd.DataFrame()
test_data = pd.DataFrame()

In [12]:
for label, group in preprocessed_data[['label', 'real', 'imaginary']].groupby('label'):
    train, test = train_test_split(group, test_size=30, random_state=42)
    train_data = pd.concat([train_data, train])
    test_data = pd.concat([test_data, test])

In [13]:
# Separate the features (signal data) and labels for training and test sets
X_train = train_data[['real', 'imaginary']]
y_train_series = train_data['label']
y_train = y_train_series.values
X_test = test_data[['real', 'imaginary']]
y_test_series = test_data['label']
y_test = y_test_series.values

In [14]:
y_train_series.value_counts()

label
Iphone/5/013409009258565_gamze_uyuk                120
Iphone/5/013737001703349_makbule_guclu             120
Iphone/5s/352053069423260_mustafa_gungor           120
Iphone/6/354427066558690_mustafa_guclu             120
Iphone/6/355401074766578_berat_serefoglu           120
Iphone/6s/355694077593599_deniz_yilmaz             120
Iphone/6s/353308076325778_tugce_ozkan              120
LG/G4/352334073607175_mert_kilic                   120
LG/G4/352334072148270_mertcan_yurtseven            120
Samsung/note3/359542051326328_gulden_vatansever    120
Samsung/note3/359543054106238_burak_uyuk           120
Samsung/S5/353812060996205_melisa_oktem            120
Samsung/S5/353812063461314_ismet_buyukkilic        120
Sony/XperiaM5/354188070809491_firat_vural          120
Sony/XperiaM5/354188076543367_leman_cetindere      120
Iphone/6s/355396082974273_melisa_oktem             119
Iphone/5s/359261061140526_melisa_topcu             119
Name: count, dtype: int64

In [15]:
y_test_series.value_counts()

label
Iphone/5/013409009258565_gamze_uyuk                30
Iphone/5/013737001703349_makbule_guclu             30
Iphone/5s/352053069423260_mustafa_gungor           30
Iphone/5s/359261061140526_melisa_topcu             30
Iphone/6/354427066558690_mustafa_guclu             30
Iphone/6/355401074766578_berat_serefoglu           30
Iphone/6s/353308076325778_tugce_ozkan              30
Iphone/6s/355396082974273_melisa_oktem             30
Iphone/6s/355694077593599_deniz_yilmaz             30
LG/G4/352334072148270_mertcan_yurtseven            30
LG/G4/352334073607175_mert_kilic                   30
Samsung/S5/353812060996205_melisa_oktem            30
Samsung/S5/353812063461314_ismet_buyukkilic        30
Samsung/note3/359542051326328_gulden_vatansever    30
Samsung/note3/359543054106238_burak_uyuk           30
Sony/XperiaM5/354188070809491_firat_vural          30
Sony/XperiaM5/354188076543367_leman_cetindere      30
Name: count, dtype: int64

In [16]:
X_train.head()

Unnamed: 0,real,imaginary
22,0 0.0011 1 0.0049 2 0.001...,0 0.016763 1 -0.008991 2 0...
15,0 0.0043 1 0.0013 2 0.005...,0 -0.013240 1 0.006514 2 -0...
65,0 0.0049 1 0.0005 2 0.005...,0 -0.016405 1 0.008252 2 -0...
11,0 0.0017 1 0.0063 2 0.001...,0 -0.011224 1 0.008671 2 -0...
42,0 0.0013 1 0.0061 2 0.001...,0 0.005455 1 0.000437 2 0...


In [17]:
X_test.head()

Unnamed: 0,real,imaginary
73,0 0.0013 1 0.0047 2 0.002...,0 0.014008 1 -0.009183 2 0...
18,0 0.0047 1 0.0005 2 0.005...,0 0.011846 1 -0.004341 2 0...
118,0 0.0041 1 0.0013 2 0.003...,0 0.000898 1 -0.002902 2 0...
78,0 0.0029 1 0.0037 2 0.004...,0 -0.006577 1 -0.000396 2 -0...
76,0 0.0011 1 0.0065 2 0.001...,0 -0.000554 1 -0.005045 2 0...


In [18]:
# Setup pipeline
bluetooth_pipeline = make_pipeline(
    MiniRocketMultivariate(
        random_state=42,
        max_dilations_per_kernel=32,
        n_jobs=-1
    ),
    RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
)

In [19]:
# Fit the pipeline
bluetooth_pipeline.fit(X_train, y_train)

In [20]:
# Evaluate the pipeline
accuracy = bluetooth_pipeline.score(X_test, y_test)
print(f"Test Accuracy: {accuracy:.3f}")

Test Accuracy: 0.939


In [21]:
# Predict labels for the test set
y_pred = bluetooth_pipeline.predict(X_test)

# Create a DataFrame to compare true and predicted labels
results = pd.DataFrame({
    'True Label': y_test,
    'Predicted Label': y_pred
})

# Identify incorrect predictions
incorrect_predictions = results[results['True Label'] != results['Predicted Label']]

# Print incorrect predictions
print("Incorrect Predictions:")
print(incorrect_predictions)

Incorrect Predictions:
                                          True Label  \
12               Iphone/5/013409009258565_gamze_uyuk   
13               Iphone/5/013409009258565_gamze_uyuk   
36            Iphone/5/013737001703349_makbule_guclu   
145           Iphone/6/354427066558690_mustafa_guclu   
148           Iphone/6/354427066558690_mustafa_guclu   
162         Iphone/6/355401074766578_berat_serefoglu   
165         Iphone/6/355401074766578_berat_serefoglu   
169         Iphone/6/355401074766578_berat_serefoglu   
238           Iphone/6s/355396082974273_melisa_oktem   
266           Iphone/6s/355694077593599_deniz_yilmaz   
278          LG/G4/352334072148270_mertcan_yurtseven   
282          LG/G4/352334072148270_mertcan_yurtseven   
283          LG/G4/352334072148270_mertcan_yurtseven   
290          LG/G4/352334072148270_mertcan_yurtseven   
292          LG/G4/352334072148270_mertcan_yurtseven   
294          LG/G4/352334072148270_mertcan_yurtseven   
361      Samsung/S5/35381