<a href="https://colab.research.google.com/github/JayKarhade/Breath-Classification/blob/main/Breath_Detection_DTRF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import tensorflow as tf
from sklearn.metrics import confusion_matrix, classification_report, f1_score
from sklearn import preprocessing
from sklearn import tree
from sklearn.metrics import plot_confusion_matrix
import matplotlib.pylab as plt
import pandas as pd
import sklearn
from keras.utils import to_categorical

In [None]:
df1 = pd.read_csv('/content/drive/MyDrive/breath_detect/dataset.csv')
df2 = pd.read_csv('/content/drive/MyDrive/breath_detect/class_labels.csv')
x = df1.to_numpy()[:,1:390]*100000
y = df2.to_numpy()[:,1]
print(x.shape,y.shape)

(455, 389) (455,)


In [None]:
#Shuffle data
indices = list(range(x.shape[0]))
np.random.shuffle(indices)
x = x[indices]
y=y[indices]
#x = x.reshape(x.shape[0],x.shape[1],1)

##Train-Test split
from sklearn.model_selection import train_test_split
#y= to_categorical(y)
x_train_raw, x_test_raw, y_train_raw, y_test_raw = train_test_split(x,y, test_size=0.25, random_state=1)
x_train_raw, x_val_raw, y_train_raw, y_val_raw = train_test_split(x_train_raw,y_train_raw, test_size=2/8, random_state=1)

label_names = ['Normal', 'Slow', 'Fast', 'Deep']

In [None]:
import scipy.stats as st
from scipy.fftpack import fft, fftfreq 
from scipy.signal import argrelextrema
import operator

def stat_area_features(x, Te=1.0):

    mean_ts = np.mean(x, axis=1).reshape(-1,1) # mean
    max_ts = np.amax(x, axis=1).reshape(-1,1) # max
    min_ts = np.amin(x, axis=1).reshape(-1,1) # min
    std_ts = np.std(x, axis=1).reshape(-1,1) # std
    skew_ts = st.skew(x, axis=1).reshape(-1,1) # skew
    kurtosis_ts = st.kurtosis(x, axis=1).reshape(-1,1) # kurtosis 
    iqr_ts = st.iqr(x, axis=1).reshape(-1,1) # interquartile rante
    mad_ts = np.median(np.sort(abs(x - np.median(x, axis=1).reshape(-1,1)),
                               axis=1), axis=1).reshape(-1,1) # median absolute deviation
    area_ts = np.trapz(x, axis=1, dx=Te).reshape(-1,1) # area under curve
    sq_area_ts = np.trapz(x ** 2, axis=1, dx=Te).reshape(-1,1) # area under curve ** 2

    return np.concatenate((mean_ts,max_ts,min_ts,std_ts,skew_ts,kurtosis_ts,
                           iqr_ts,mad_ts,area_ts,sq_area_ts), axis=1)

def frequency_domain_features(x, Te=1.0):

    # As the DFT coefficients and their corresponding frequencies are symetrical arrays
    # with respect to the middle of the array we need to know if the number of readings 
    # in x is even or odd to then split the arrays...
    if x.shape[1]%2 == 0:
        N = int(x.shape[1]/2)
    else:
        N = int(x.shape[1]/2) - 1
    xf = np.repeat(fftfreq(x.shape[1],d=Te)[:N].reshape(1,-1), x.shape[0], axis=0) # frequencies
    dft = np.abs(fft(x, axis=1))[:,:N] # DFT coefficients   
    
    # statistical and area features
    dft_features = stat_area_features(dft, Te=1.0)
    # weighted mean frequency
    dft_weighted_mean_f = np.average(xf, axis=1, weights=dft).reshape(-1,1)
    # 5 first DFT coefficients 
    dft_first_coef = dft[:,:5]    
    # 5 first local maxima of DFT coefficients and their corresponding frequencies
    dft_max_coef = np.zeros((x.shape[0],5))
    dft_max_coef_f = np.zeros((x.shape[0],5))
    for row in range(x.shape[0]):
        # finds all local maximas indexes
        extrema_ind = argrelextrema(dft[row,:], np.greater, axis=0) 
        # makes a list of tuples (DFT_i, f_i) of all the local maxima
        # and keeps the 5 biggest...
        extrema_row = sorted([(dft[row,:][j],xf[row,j]) for j in extrema_ind[0]],
                             key=operator.itemgetter(0), reverse=True)[:5] 
        for i, ext in enumerate(extrema_row):
            dft_max_coef[row,i] = ext[0]
            dft_max_coef_f[row,i] = ext[1]    
    
    return np.concatenate((dft_features,dft_weighted_mean_f,dft_first_coef,
                           dft_max_coef,dft_max_coef_f), axis=1)

def make_feature_vector(x, Te=1.0):

    # Raw signals :  stat and area features
    features_xt = stat_area_features(x, Te=Te)
    
    # Jerk signals :  stat and area features
    features_xt_jerk = stat_area_features((x[:,1:]-x[:,:-1])/Te, Te=Te)
    
    # Raw signals : frequency domain features 
    features_xf = frequency_domain_features(x, Te=1/Te)
    
    # Jerk signals : frequency domain features 
    features_xf_jerk = frequency_domain_features((x[:,1:]-x[:,:-1])/Te, Te=1/Te)
        
    return np.concatenate((features_xt, features_xt_jerk, features_xf,features_xf_jerk), axis=1)

In [None]:
X_train = make_feature_vector(x_train_raw, Te=1/50)
X_test = make_feature_vector(x_test_raw, Te=1/50)

print("X_train shape : {}".format(X_train.shape))
print("X_test shape: {}".format(X_test.shape))

scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train) 
X_test = scaler.transform(X_test)

X_train shape : (255, 72)
X_test shape: (114, 72)


In [None]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train_raw)
y_pred = clf.predict(X_test)
print(classification_report(y_test_raw, y_pred, target_names=label_names))


              precision    recall  f1-score   support

      Normal       0.64      0.74      0.68        19
        Slow       0.95      0.73      0.83        26
        Fast       0.86      0.82      0.84        44
        Deep       0.83      1.00      0.91        25

    accuracy                           0.82       114
   macro avg       0.82      0.82      0.81       114
weighted avg       0.84      0.82      0.82       114



In [None]:
from sklearn.ensemble import RandomForestClassifier

clf_rf = RandomForestClassifier(max_depth=20, random_state=0)
clf_rf = clf_rf.fit(x_train_raw,y_train_raw)
y_pred = clf_rf.predict(x_test_raw)
print(classification_report(y_test_raw, y_pred, target_names=label_names))


              precision    recall  f1-score   support

      Normal       0.57      0.42      0.48        19
        Slow       0.68      0.81      0.74        26
        Fast       0.80      0.75      0.78        44
        Deep       0.64      0.72      0.68        25

    accuracy                           0.70       114
   macro avg       0.67      0.67      0.67       114
weighted avg       0.70      0.70      0.70       114

