In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression

In [3]:
### Process Data
# 1. Normalize all data using sliding window normalization for each set
# 2. Split the data into windows
# 3. Extract features from each window
# 4. Split the data into train and test

In [5]:
data_files = ["..\data\LK_pressed_with_states.csv"]

# Getting a pandas with normalized signal and state info!

raw_data = pd.read_csv("..\data\LK_pressed_with_states.csv", delimiter=',')
raw_data = raw_data.dropna()
raw_data = raw_data[raw_data.columns[raw_data.columns.isin(['Time', 'Signal', 'State'])]]
raw_signal = raw_data['Signal'].to_numpy()

# def sliding_window_normalization(signal, L_norm):
#     signal = np.array(signal)
#     normalized_signal = np.zeros_like(signal)
    
#     for t in range(len(signal)):
#         if t >= L_norm - 1:
#             # the sliding window
#             window = signal[t - L_norm + 1:t + 1]
#             mean = np.mean(window)
#             std = np.std(window)
            
#             # Normalize the current value
#             normalized_signal[t] = (signal[t] - mean) / std if std != 0 else 0
#         else:
#             # For the initial values, normalization is skipped or handled differently
#             normalized_signal[t] = None  

#     return normalized_signal

normalized_signal = (raw_signal - np.mean(raw_signal)) / np.std(raw_signal) #sliding_window_normalization(raw_signal, 250)
raw_data["Signal"] = normalized_signal
df = raw_data.dropna()


In [9]:
# Splitting into windows and getting features!

def percentile20(data):
    return np.percentile(data,20)

def skew(data):
    mean = np.mean(data)
    std = np.std(data)
    N = np.size(data)
    top = np.sum((data - mean)**3)
    bottom = (N-1)*(std**3)
    total = top/(bottom + 0.0000001)
    return total

def kurtosis(data):
    mean = np.mean(data)
    top = np.sum((data - mean)**4)
    bottom = np.sum((data - (mean)**2)**2)
    total = top/(bottom + 0.00000001)
    return total

def stdfd(data):
    difference = np.diff(data)
    std = np.std(difference)
    return std

slice_length = 10

mean_list = []
max_list = []
min_list = []
range_list = []
last_list = []
second_list = []
perc_list = []
skew_list = []
kurt_list = []
stdiff_list = []
state_list = []

# Take slice_length number of signal points
for i in range(0, len(df) - slice_length + 1):
    current_frame = df.iloc[i: i + slice_length - 1]
        
    # Standard features
    mean_list.append(current_frame[ "Signal" ].mean())
    max_list.append(current_frame[ "Signal" ].max())
    min_list.append(current_frame[ "Signal" ].min())
    range_list.append(current_frame[ "Signal" ].max() - current_frame[ "Signal" ].min())
    
    perc_list.append(percentile20(current_frame[ "Signal" ]))
    skew_list.append(skew(current_frame[ "Signal" ]))
    kurt_list.append(kurtosis(current_frame[ "Signal" ]))
    stdiff_list.append(stdfd(current_frame["Signal"]))
    
    # Other features of interest
    last_list.append(current_frame.iloc[-1]["Signal"])
    second_list.append(current_frame.iloc[-2]["Signal"])
    
    # State, Y
    state_list.append(current_frame.iloc[-1]["State"])
    
# Make feature df for training
feature_df = pd.DataFrame({"Mean": mean_list, 
                           "Max": max_list, 
                           "Min": min_list, 
                           "Range": range_list,
                           "Last Val": last_list, 
                           "2nd Last Val": second_list, 
                           "Percentile": perc_list,
                           "Skew": skew_list,
                           "Kurtosis": kurt_list,
                           "Stdiff": stdiff_list,
                           "State": state_list
                          })

In [10]:
dataset = feature_df.to_numpy()
X = dataset[:, :-1]
y = dataset[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [11]:
# Train logistic regression model

logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)

y_pred = logistic_model.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred))

#Extract coefficients and intercept
coefficients = logistic_model.coef_[0]
intercept = logistic_model.intercept_[0]

print(coefficients)
print(intercept)

Classification Report:
              precision    recall  f1-score   support

         0.0       0.94      0.96      0.95       237
         1.0       0.97      0.96      0.96       331

    accuracy                           0.96       568
   macro avg       0.96      0.96      0.96       568
weighted avg       0.96      0.96      0.96       568

[ 1.62966612  0.23891854  0.7910973  -0.55217876  2.0875778   0.233883
  0.14496411  0.15553272  0.08675103 -0.75360599]
1.9566517716945058
