In [1]:
import numpy as np
import glob
import re
import shutil
import random
import itertools
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
!unzip /content/FINAL_DATASET.zip

Archive:  /content/FINAL_DATASET.zip
   creating: Dataset/SETA/
  inflating: Dataset/SETA/healthy_open1.csv  
  inflating: Dataset/SETA/healthy_open10.csv  
  inflating: Dataset/SETA/healthy_open11.csv  
  inflating: Dataset/SETA/healthy_open12.csv  
  inflating: Dataset/SETA/healthy_open2.csv  
  inflating: Dataset/SETA/healthy_open3.csv  
  inflating: Dataset/SETA/healthy_open4.csv  
  inflating: Dataset/SETA/healthy_open5.csv  
  inflating: Dataset/SETA/healthy_open6.csv  
  inflating: Dataset/SETA/healthy_open7.csv  
  inflating: Dataset/SETA/healthy_open8.csv  
  inflating: Dataset/SETA/healthy_open9.csv  
   creating: Dataset/SETB/
  inflating: Dataset/SETB/healthy_closed1.csv  
  inflating: Dataset/SETB/healthy_closed10.csv  
  inflating: Dataset/SETB/healthy_closed11.csv  
  inflating: Dataset/SETB/healthy_closed12.csv  
  inflating: Dataset/SETB/healthy_closed2.csv  
  inflating: Dataset/SETB/healthy_closed3.csv  
  inflating: Dataset/SETB/healthy_closed4.csv  
  inflating: Da

In [7]:
files = glob.glob("/content/Dataset/*/*")
files

['/content/Dataset/SETA/healthy_open1.csv',
 '/content/Dataset/SETA/healthy_open6.csv',
 '/content/Dataset/SETA/healthy_open2.csv',
 '/content/Dataset/SETA/healthy_open3.csv',
 '/content/Dataset/SETA/healthy_open9.csv',
 '/content/Dataset/SETA/healthy_open7.csv',
 '/content/Dataset/SETA/healthy_open12.csv',
 '/content/Dataset/SETA/healthy_open10.csv',
 '/content/Dataset/SETA/healthy_open5.csv',
 '/content/Dataset/SETA/healthy_open4.csv',
 '/content/Dataset/SETA/healthy_open8.csv',
 '/content/Dataset/SETA/healthy_open11.csv',
 '/content/Dataset/SETC/alzeimer_open1.csv',
 '/content/Dataset/SETC/alzeimer_open3.csv',
 '/content/Dataset/SETC/alzeimer_open5.csv',
 '/content/Dataset/SETC/alzeimer_open8.csv',
 '/content/Dataset/SETC/alzeimer_open2.csv',
 '/content/Dataset/SETC/alzeimer_open6.csv',
 '/content/Dataset/SETC/alzeimer_open12.csv',
 '/content/Dataset/SETC/alzeimer_open11.csv',
 '/content/Dataset/SETC/alzeimer_open9.csv',
 '/content/Dataset/SETC/alzeimer_open10.csv',
 '/content/Datas

**Cleaning The Dataset**


*   Replacing Non Numeric Values 
*   Replacing inf values



In [6]:
def clean(path):
    df = pd.read_csv(path)
    
    for column in df.columns:
        if df[column].dtype == 'object':
            print("Sample : ",path," feature : ",column," is uncleaned")
            df[column] = pd.to_numeric(df[column], errors='coerce')
            df[column] = df[column].fillna(method='ffill')
            df[column] = df[column].fillna(method='bfill')
    df = df.iloc[:1024,:]

    while df.isnull().sum().values.sum() != 0:
        print("Sample : ",path,"getting cleaned")
        df = df.fillna(method = 'ffill')
        df = df.fillna(method = 'bfill')
    df.to_csv(path, index=False)
     
for i in files:
    clean(i)

Sample :  /content/Dataset/SETA/healthy_open2.csv  feature :  16  is uncleaned
Sample :  /content/Dataset/SETA/healthy_open11.csv  feature :  14  is uncleaned
Sample :  /content/Dataset/SETC/alzeimer_open5.csv  feature :  0  is uncleaned
Sample :  /content/Dataset/SETC/alzeimer_open5.csv getting cleaned
Sample :  /content/Dataset/SETC/alzeimer_open8.csv  feature :  0  is uncleaned
Sample :  /content/Dataset/SETC/alzeimer_open8.csv getting cleaned
Sample :  /content/Dataset/SETC/alzeimer_open6.csv  feature :  0  is uncleaned
Sample :  /content/Dataset/SETC/alzeimer_open6.csv getting cleaned
Sample :  /content/Dataset/SETC/alzeimer_open10.csv  feature :  0  is uncleaned
Sample :  /content/Dataset/SETC/alzeimer_open10.csv getting cleaned
Sample :  /content/Dataset/SETD/alzeimer_closed12.csv  feature :  18  is uncleaned
Sample :  /content/Dataset/SETB/healthy_closed2.csv  feature :  16  is uncleaned
Sample :  /content/Dataset/SETB/healthy_closed6.csv  feature :  14  is uncleaned


In [8]:
len(files)

48

**Creating pairs for similarity based learning**

In [9]:
train = []
test = []
val = []

In [10]:
for i in range(0,48,12):
    train += files[i:i+6]
    test += files[i+6:i+9]
    val += files[i+9:i+12]

**No Common samples in train and test/val**

In [12]:
set(train).intersection(set(test)),set(train).intersection(set(test))

(set(), set())

In [13]:
train_pairs = list(itertools.combinations(train,2))
test_pairs = list(itertools.combinations(test,2))
val_pairs = list(itertools.combinations(val,2))

In [14]:
len(train_pairs),len(test_pairs),len(val_pairs)

(276, 66, 66)

**Visualising pairs**

In [15]:
train_df = pd.DataFrame(columns = ['Query',"Reference"],data = train_pairs)

In [16]:
train_df

Unnamed: 0,Query,Reference
0,/content/Dataset/SETA/healthy_open1.csv,/content/Dataset/SETA/healthy_open6.csv
1,/content/Dataset/SETA/healthy_open1.csv,/content/Dataset/SETA/healthy_open2.csv
2,/content/Dataset/SETA/healthy_open1.csv,/content/Dataset/SETA/healthy_open3.csv
3,/content/Dataset/SETA/healthy_open1.csv,/content/Dataset/SETA/healthy_open9.csv
4,/content/Dataset/SETA/healthy_open1.csv,/content/Dataset/SETA/healthy_open7.csv
...,...,...
271,/content/Dataset/SETB/healthy_closed4.csv,/content/Dataset/SETB/healthy_closed9.csv
272,/content/Dataset/SETB/healthy_closed4.csv,/content/Dataset/SETB/healthy_closed11.csv
273,/content/Dataset/SETB/healthy_closed2.csv,/content/Dataset/SETB/healthy_closed9.csv
274,/content/Dataset/SETB/healthy_closed2.csv,/content/Dataset/SETB/healthy_closed11.csv


In [17]:
def to_numeric(csv):
    data = pd.read_csv(csv).values
    return data

In [18]:
def assemble_pairs(pair):
    for i in range(len(pair)):
        sample = list(pair[i])
        x1 = sample[0]
        x2 = sample[1]
        if "closed" in x1:
            sample.append(0)
        elif "open" in x1:
            sample.append(1) 
        if "closed" in x2:
            sample.append(0)
        elif "open" in x2:
            sample.append(1)    
        
        if ("healthy" in x1 and "healthy" in x2) or ("alzeimer" in x1 and "alzeimer" in x2):
            sample.append(0)
        else:
            sample.append(1)    

        sample[0] = to_numeric(x1)
        sample[1] = to_numeric(x2)

        pair[i] = sample   
    return pair      
     

In [19]:
train_pairs = assemble_pairs(train_pairs)
test_pairs = assemble_pairs(test_pairs)
val_pairs = assemble_pairs(val_pairs)

**Storing data as tf records for faster and parallel processing**

In [20]:
def serialize(wave1,wave2,state,diff):
    feature = {
        'wave1':tf.train.Feature(bytes_list = tf.train.BytesList(value = [wave1.numpy()])),
        'wave2':tf.train.Feature(bytes_list = tf.train.BytesList(value = [wave2.numpy()])),
        'state':tf.train.Feature(bytes_list = tf.train.BytesList(value = [state.numpy()])),
        'diff':tf.train.Feature(bytes_list = tf.train.BytesList(value = [diff.numpy()]))
    }
    example = tf.train.Example(features = tf.train.Features(feature = feature))
    return example.SerializeToString()

In [21]:
def store_records(pairs,name):
    with tf.io.TFRecordWriter("/content/"+name,options=tf.io.TFRecordOptions(compression_type="GZIP")) as writer:
        for sample in pairs:
            x1 = tf.io.serialize_tensor(sample[0].astype(np.float32))
            x2 = tf.io.serialize_tensor(sample[1].astype(np.float32))
            state = tf.io.serialize_tensor(np.array([sample[2],sample[3]]).astype(np.float32))
            output = tf.io.serialize_tensor(np.array([sample[4]]).astype(np.float32))
            serialized = serialize(x1,x2,state,output)
            writer.write(serialized)

store_records(train_pairs,"train_rec")
store_records(test_pairs,"test_rec")
store_records(val_pairs,"val_rec")

In [23]:
shutil.move("train_rec","/content/drive/MyDrive/FewShot/")

'/content/drive/MyDrive/FewShot/train_rec'

In [24]:
shutil.move("val_rec","/content/drive/MyDrive/FewShot/")
shutil.move("test_rec","/content/drive/MyDrive/FewShot/")

'/content/drive/MyDrive/FewShot/test_rec'