<a href="https://colab.research.google.com/github/RishabhVenkat/Fz_SVM_Classif/blob/main/Combined_Fz_svm_classif.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing the Dependencies

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import accuracy_score

Data Collection and Processing

In [2]:
# loading the data from csv file to a PAndas DataFrame
coma_data = pd.read_csv('/content/Combined_Fz(AutoRecovered).csv')

In [3]:
# printing the first 5 rows of the dataframe
coma_data.head()

Unnamed: 0,Fz,Label
0,0.829079,0
1,0.822159,0
2,0.830567,0
3,0.849762,0
4,0.823495,0


In [4]:
# number of rows and columns in the dataset
coma_data.shape

(24065, 2)

In [5]:
# getting more information about the dataset
coma_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24065 entries, 0 to 24064
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Fz      24065 non-null  float64
 1   Label   24065 non-null  int64  
dtypes: float64(1), int64(1)
memory usage: 376.1 KB


In [6]:
# checking for missing values in each column
coma_data.isnull().sum()

Fz       0
Label    0
dtype: int64

In [7]:
missing_values = coma_data.isnull()

print(missing_values)

          Fz  Label
0      False  False
1      False  False
2      False  False
3      False  False
4      False  False
...      ...    ...
24060  False  False
24061  False  False
24062  False  False
24063  False  False
24064  False  False

[24065 rows x 2 columns]


In [8]:
rows_with_missing = coma_data[coma_data.isnull().any(axis=1)]
print(rows_with_missing)

Empty DataFrame
Columns: [Fz, Label]
Index: []


In [9]:
# getting some statistical measures about the data
coma_data.describe()

Unnamed: 0,Fz,Label
count,24065.0,24065.0
mean,0.845245,1.987534
std,0.034438,0.143605
min,0.72375,0.0
25%,0.823874,2.0
50%,0.847279,2.0
75%,0.868666,2.0
max,0.956149,2.0


In [10]:
# distribution of target variable
coma_data['Label'].value_counts()

Label
2    23865
0      100
1      100
Name: count, dtype: int64

0 -> MCS

1 -> VS

2 -> Healthy

In [11]:
 # grouping the data based on the target variable
 coma_data.groupby('Label').mean()

Unnamed: 0_level_0,Fz
Label,Unnamed: 1_level_1
0,0.837951
1,0.811293
2,0.845418


Data Preprocessing

In [12]:
#from sklearn.preprocessing import LabelEncoder

# Assuming 'Label' column is categorical
#label_encoder = LabelEncoder()
#coma_data['Label'] = label_encoder.fit_transform(coma_data['Label'])

In [13]:
#from sklearn.preprocessing import StandardScaler

# Standardize the Fz column
#scaler = StandardScaler()
#coma_data['Fz'] = scaler.fit_transform(coma_data[['Fz']])

In [14]:
from scipy.signal import butter, filtfilt

def butter_bandpass(lowcut, highcut, fs, order=5):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return b, a

def bandpass_filter(data, lowcut=0.5, highcut=50.0, fs=256.0, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = filtfilt(b, a, data)
    return y

# Apply bandpass filter to the Fz column
fs = 256.0  # Sampling frequency, adjust according to your data
coma_data['Fz'] = bandpass_filter(coma_data['Fz'], fs=fs)


In [15]:
#from scipy.stats import kurtosis, skew

# Example feature extraction
#coma_data['mean'] = coma_data['Fz'].rolling(window=128).mean()
#coma_data['std'] = coma_data['Fz'].rolling(window=128).std()
#coma_data['kurtosis'] = coma_data['Fz'].rolling(window=128).apply(kurtosis)
#coma_data['skew'] = coma_data['Fz'].rolling(window=128).apply(skew)

In [16]:
# Drop rows with NaN values generated by rolling
coma_data = coma_data.dropna()

In [17]:
# Features and labels
X = coma_data['Fz']
y = coma_data['Label']

Separating the features and Target

In [18]:
X = coma_data.drop(columns = ['Label'], axis = 1)
Y = coma_data['Label']

In [19]:
print(X)

             Fz
0      0.001519
1      0.004535
2      0.006580
3      0.005792
4      0.001887
...         ...
24060  0.020981
24061  0.023662
24062  0.021062
24063  0.011587
24064 -0.002415

[24065 rows x 1 columns]


In [20]:
print(Y)

0        0
1        0
2        0
3        0
4        0
        ..
24060    2
24061    2
24062    2
24063    2
24064    2
Name: Label, Length: 24065, dtype: int64


Splitting the data to training data and Test data

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, random_state = 2)

In [22]:
print(X.shape, X_train.shape, X_test.shape)

(24065, 1) (19252, 1) (4813, 1)


Data Standardization

In [23]:
scaler = StandardScaler()

In [24]:
scaler.fit(X_train)

In [25]:
X_train = scaler.transform(X_train)

X_test = scaler.transform(X_test)

In [26]:
print(X_train)

[[-7.67387546e-01]
 [-1.27688661e-01]
 [ 6.99987603e-01]
 ...
 [ 9.03144094e-04]
 [ 1.65603850e+00]
 [ 7.62367026e-02]]


Model Training

Support Vector Machine Model

In [27]:
model = svm.SVC(kernel = 'linear')

In [28]:
# training the SVM model with training data
model.fit(X_train, y_train)

Model Evaluation

Accuracy Score

In [29]:
# accuracy score on training data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(y_train, X_train_prediction)

In [30]:
print('Accuracy score of training data :', training_data_accuracy)

Accuracy score of training data : 0.9917930604612508


In [31]:
# accuracy score on testing data
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(y_test, X_test_prediction)

In [32]:
print('Accuracy score of test data :', test_data_accuracy)

Accuracy score of test data : 0.9912736339081654


Building a Predictive System

In [33]:
input_data = (0.819187179)

# changing input data to a numpy array
input_data_as_numpy_array = np.asarray(input_data)

# reshaping the numpy array
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

# standardize the data
std_data = scaler.transform(input_data_reshaped)

prediction = model.predict(std_data)
print(prediction)

[2]


