In [7]:
# importing the required packages
import pandas as pd
import numpy as np
from scipy.stats import skew
from scipy.fftpack import fft
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import normalize
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans 
from sklearn.cluster import DBSCAN
from sklearn import metrics
import statistics
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_validate,train_test_split,StratifiedKFold,KFold
import pickle


def DataPreProcessing(CGM_Data):
    no_of_rows=CGM_Data.shape[0]
    no_of_columns = CGM_Data.shape[1]
    CGM_Data.dropna(axis=0, how='all', thresh=no_of_columns/4, subset=None, inplace=True)
    CGM_Data.dropna(axis=1, how='all', thresh=no_of_rows/4, subset=None, inplace=True)
    CGM_Data.interpolate(axis=0, method ='linear', limit_direction ='forward', inplace=True)
    #CGM_Data.bfill(axis=1,inplace=True)
    return CGM_Data


def ExtractFeatures(CGM_Data):
    
    Feature_Matrix = pd.DataFrame() 

    # Feature 1 - Fast Fourier Transform
    FFT = pd.DataFrame()
    def calculate_fft_vals(series):
        FFT_abs = abs(fft(series))
        FFT_abs.sort()
        return np.flip(FFT_abs)[0:8]

    FFT['FFT_vals'] = CGM_Data.apply(lambda series: calculate_fft_vals(series), axis=1)
    FFT_Vals= pd.DataFrame(FFT.FFT_vals.tolist(), columns=['FFT1', 'FFT2', 'FFT3', 'FFT4', 'FFT5', 'FFT6', 'FFT7','FFT8'],index=FFT.FFT_vals.index)
    Feature_Matrix = pd.concat([Feature_Matrix,FFT_Vals],axis=1)
    
    print(Feature_Matrix.shape)
    
    
    # Feature 2 - Max of CGM Velocity 
    
    Velocity_Data = pd.DataFrame()
    win_size=6
    total_vals=CGM_Data.shape[1]-win_size

    for index in range(0, total_vals):
        dv = CGM_Data.iloc[:, index + win_size] - CGM_Data.iloc[:, index]
        Velocity_Data['vel'+str(index)] = dv

    Feature_Matrix['Max CGM Vel']=Velocity_Data.max(axis = 1,skipna=True)
    
    print(Feature_Matrix.shape)
    
        
    # Feature 3 - polyfit   
    def calculate_polyfit(series,degree=3):
        row_arr = np.array(series.index)
        return np.polyfit(row_arr, series, degree)
    
    Polyfit_vals = CGM_Data.apply(calculate_polyfit,axis=1,result_type='expand')
    Feature_Matrix = pd.concat([Feature_Matrix,Polyfit_vals],axis=1)
    
    print(Feature_Matrix.shape)
     
 
    return Feature_Matrix


def predit_cluster_labels():

    kmeans_handler = open("KMeans.model","rb")
    kmeans_labels = pickle.load(kmeans_handler)
    pd.DataFrame(kmeans_labels.predict(X_principal)).to_csv("KMeans_output.csv",index=False,header=False)
    kmeans_handler.close()

    dbscan_handler = open("DBSCAN.model","rb")
    dbscan_labels = pickle.load(dbscan_handler)
    pd.DataFrame(dbscan_labels.predict(X_principal)).to_csv("DBScan_output.csv",index=False,header=False)
    dbscan_handler.close()

    
if __name__=="__main__":
    
    column_names = [i for i in range(0,31)]
    input_file_path = input("Enter the test file name/path")
    print("Entered file path",input_file_path)
    Test_Data = pd.read_csv(input_file_path,names=column_names)
    
    # Data Preprocessing
    Test_Data_Pre = DataPreProcessing(Test_Data)
    
    #Extract Features
    Test_Data_Features = ExtractFeatures(Test_Data_Pre)
    
    # Standardize feature matrix
    Feature_Matrix_std  = StandardScaler().fit_transform(Test_Data_Features)
    
    # Normalize the data so that the data follows a Gaussian distribution
    Feature_Matrix_norm = normalize(Feature_Matrix_std)
    Feature_Matrix_norm = pd.DataFrame(Feature_Matrix_norm)
    
    # Do PCA
    pca=PCA(n_components=2)
    X_principal = pca.fit_transform(Feature_Matrix_norm)
    X_principal = pd.DataFrame(X_principal)
    X_principal.columns = ['PCA1','PCA2']
    
    # Predict cluster labels
    predit_cluster_labels()
    


Enter the test file name/pathproj3_test.csv
Entered file path proj3_test.csv
(51, 8)
(51, 9)
(51, 13)
