In [None]:
import numpy as np
import glob
import re
import shutil
import random
import itertools
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
!pip install tensorflow_addons
import tensorflow_addons as tfa

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow_addons
  Downloading tensorflow_addons-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (591 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m591.0/591.0 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard<3.0.0,>=2.7 (from tensorflow_addons)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow_addons
Successfully installed tensorflow_addons-0.20.0 typeguard-2.13.3



TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip "/content/drive/MyDrive/FewShot/final_weights_best.zip" -d "/content/weights/"

Archive:  /content/drive/MyDrive/FewShot/final_weights_best.zip
  inflating: /content/weights/checkpoint  
  inflating: /content/weights/best_one.index  
  inflating: /content/weights/best_one.data-00000-of-00001  


LOAD THE MODEL

In [None]:
batch_size = 32

class SQD(tf.keras.layers.Layer):
    def __init__(self):
        super(SQD, self).__init__()
    def call(self, inputs):
        x, y = inputs
        diff = tf.subtract(x, y)
        return tf.square(diff)

def wave_downsample():
    input = tf.keras.Input(batch_size = batch_size,shape = (1024,19))
    output = tf.keras.layers.LSTM(10,return_sequences = True)(input)
    output = tf.keras.layers.Dense(2)(output)
    output = tf.keras.layers.Flatten()(output)
    output = tf.keras.layers.Activation('relu')(output)
    output = tf.keras.layers.Dropout(0.3)(output)
    output = tf.keras.layers.Dense(20)(output)
    output = tf.keras.layers.Dropout(0.1)(output)
    model = tf.keras.Model(inputs = [input],outputs = [output])
    return model
     
def get_model():
    wave1 = tf.keras.Input(batch_size = batch_size,shape = (1024,19),name = 'wave1')
    wave2 = tf.keras.Input(batch_size = batch_size,shape = (1024,19),name = 'wave2')
    state = tf.keras.Input(batch_size = batch_size,shape = (2,),name = 'state')
    
    output1 = tf.keras.layers.BatchNormalization()(wave1)
    output2 = tf.keras.layers.BatchNormalization()(wave2)

    down_sampler = wave_downsample()
    output1 = down_sampler(output1)
    output2 = down_sampler(output2)

    output1 = tf.keras.layers.Activation('tanh')(output1)
    output1 = tf.keras.layers.Dropout(0.3)(output1)

    output2 = tf.keras.layers.Activation('tanh')(output2)
    output2 = tf.keras.layers.Dropout(0.3)(output2)

    output = SQD()([output1,output2])
    output = tf.keras.layers.concatenate([output,state])
    output = tf.keras.layers.Dense(10)(output)
    output = tf.keras.layers.Dropout(0.2)(output)
    output = tf.keras.layers.Dense(1,activation = 'sigmoid',name = 'difference' )(output)
    
    model = tf.keras.Model(inputs = [wave1,wave2,state],outputs = [output],name = 'FewShot')
    return model

model = get_model()
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 0.000001),
              loss = 'binary_crossentropy',
              metrics = [tf.keras.metrics.BinaryAccuracy(threshold=0.8),
                         tfa.metrics.F1Score(num_classes=1,threshold = 0.8)])

In [None]:
model.load_weights("/content/weights/best_one")

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f785cf46da0>

Cleaning data for testing

In [None]:
!unzip "/content/FINAL_DATASET.zip"

Archive:  /content/FINAL_DATASET.zip
   creating: Dataset/SETA/
  inflating: Dataset/SETA/healthy_open1.csv  
  inflating: Dataset/SETA/healthy_open10.csv  
  inflating: Dataset/SETA/healthy_open11.csv  
  inflating: Dataset/SETA/healthy_open12.csv  
  inflating: Dataset/SETA/healthy_open2.csv  
  inflating: Dataset/SETA/healthy_open3.csv  
  inflating: Dataset/SETA/healthy_open4.csv  
  inflating: Dataset/SETA/healthy_open5.csv  
  inflating: Dataset/SETA/healthy_open6.csv  
  inflating: Dataset/SETA/healthy_open7.csv  
  inflating: Dataset/SETA/healthy_open8.csv  
  inflating: Dataset/SETA/healthy_open9.csv  
   creating: Dataset/SETB/
  inflating: Dataset/SETB/healthy_closed1.csv  
  inflating: Dataset/SETB/healthy_closed10.csv  
  inflating: Dataset/SETB/healthy_closed11.csv  
  inflating: Dataset/SETB/healthy_closed12.csv  
  inflating: Dataset/SETB/healthy_closed2.csv  
  inflating: Dataset/SETB/healthy_closed3.csv  
  inflating: Dataset/SETB/healthy_closed4.csv  
  inflating: Da

In [None]:
files = glob.glob("/content/Dataset/*/*")
files

['/content/Dataset/SETD/alzeimer_closed2.csv',
 '/content/Dataset/SETD/alzeimer_closed6.csv',
 '/content/Dataset/SETD/alzeimer_closed7.csv',
 '/content/Dataset/SETD/alzeimer_closed10.csv',
 '/content/Dataset/SETD/alzeimer_closed11.csv',
 '/content/Dataset/SETD/alzeimer_closed4.csv',
 '/content/Dataset/SETD/alzeimer_closed1.csv',
 '/content/Dataset/SETD/alzeimer_closed8.csv',
 '/content/Dataset/SETD/alzeimer_closed5.csv',
 '/content/Dataset/SETD/alzeimer_closed9.csv',
 '/content/Dataset/SETD/alzeimer_closed12.csv',
 '/content/Dataset/SETD/alzeimer_closed3.csv',
 '/content/Dataset/SETA/healthy_open1.csv',
 '/content/Dataset/SETA/healthy_open10.csv',
 '/content/Dataset/SETA/healthy_open7.csv',
 '/content/Dataset/SETA/healthy_open8.csv',
 '/content/Dataset/SETA/healthy_open3.csv',
 '/content/Dataset/SETA/healthy_open4.csv',
 '/content/Dataset/SETA/healthy_open5.csv',
 '/content/Dataset/SETA/healthy_open2.csv',
 '/content/Dataset/SETA/healthy_open12.csv',
 '/content/Dataset/SETA/healthy_ope

In [None]:
def clean(path):
    df = pd.read_csv(path)
    
    for column in df.columns:
        if df[column].dtype == 'object':
            print("Sample : ",path," feature : ",column," is uncleaned")
            df[column] = pd.to_numeric(df[column], errors='coerce')
            df[column] = df[column].fillna(method='ffill')
            df[column] = df[column].fillna(method='bfill')
    df = df.iloc[:1024,:]

    while df.isnull().sum().values.sum() != 0:
        print("Sample : ",path,"getting cleaned")
        df = df.fillna(method = 'ffill')
        df = df.fillna(method = 'bfill')
    df.to_csv(path, index=False)
     
for i in files:
    clean(i)

Sample :  /content/Dataset/SETD/alzeimer_closed12.csv  feature :  18  is uncleaned
Sample :  /content/Dataset/SETA/healthy_open2.csv  feature :  16  is uncleaned
Sample :  /content/Dataset/SETA/healthy_open11.csv  feature :  14  is uncleaned
Sample :  /content/Dataset/SETC/alzeimer_open10.csv  feature :  0  is uncleaned
Sample :  /content/Dataset/SETC/alzeimer_open10.csv getting cleaned
Sample :  /content/Dataset/SETC/alzeimer_open5.csv  feature :  0  is uncleaned
Sample :  /content/Dataset/SETC/alzeimer_open5.csv getting cleaned
Sample :  /content/Dataset/SETC/alzeimer_open8.csv  feature :  0  is uncleaned
Sample :  /content/Dataset/SETC/alzeimer_open8.csv getting cleaned
Sample :  /content/Dataset/SETC/alzeimer_open6.csv  feature :  0  is uncleaned
Sample :  /content/Dataset/SETC/alzeimer_open6.csv getting cleaned
Sample :  /content/Dataset/SETB/healthy_closed2.csv  feature :  16  is uncleaned
Sample :  /content/Dataset/SETB/healthy_closed6.csv  feature :  14  is uncleaned


***TESTING TECHNIQUE*** : We will have a query sample of UNKNOWN CLASS which we need to predict as Healthy sample or Alzheimer sample. The query sample will be compared against stored reference samples of healthy and Alzheimer samples with the Similarity Few Shot NN trained. The intended results are: 


*   Query Sample belongs to 'Alzheimer' class if Similarity score between query sample and Alzheimer reference samples are closer to zero.
*   Query Sample belongs to 'Healthy' class if Similarity score between query sample and healthy reference samples are closer to zero.



In [None]:
reference = [] # For Comparing Against
query = [] # Query Samples for Detection

test_files = ['/content/Dataset/SETA/healthy_open12.csv',
 '/content/Dataset/SETA/healthy_open10.csv',
 '/content/Dataset/SETA/healthy_open5.csv',
 '/content/Dataset/SETC/alzeimer_open12.csv',
 '/content/Dataset/SETC/alzeimer_open11.csv',
 '/content/Dataset/SETC/alzeimer_open9.csv',
 '/content/Dataset/SETD/alzeimer_closed3.csv',
 '/content/Dataset/SETD/alzeimer_closed12.csv',
 '/content/Dataset/SETD/alzeimer_closed6.csv',
 '/content/Dataset/SETB/healthy_closed3.csv',
 '/content/Dataset/SETB/healthy_closed5.csv',
 '/content/Dataset/SETB/healthy_closed10.csv']

for i in range(0,12,3):
    reference += test_files[i+1:i+3]
    query.append(test_files[i])

In [None]:
healthy = []
alz = []
for afile in reference:
    if "healthy" in afile:
        healthy.append(afile)
    elif "alzeimer" in afile:
        alz.append(afile)    

Query Samples We will be testing are :

In [None]:
query

['/content/Dataset/SETA/healthy_open12.csv',
 '/content/Dataset/SETC/alzeimer_open12.csv',
 '/content/Dataset/SETD/alzeimer_closed3.csv',
 '/content/Dataset/SETB/healthy_closed3.csv']

Reference Samples :

In [None]:
healthy,alz

(['/content/Dataset/SETA/healthy_open10.csv',
  '/content/Dataset/SETA/healthy_open5.csv',
  '/content/Dataset/SETB/healthy_closed5.csv',
  '/content/Dataset/SETB/healthy_closed10.csv'],
 ['/content/Dataset/SETC/alzeimer_open11.csv',
  '/content/Dataset/SETC/alzeimer_open9.csv',
  '/content/Dataset/SETD/alzeimer_closed12.csv',
  '/content/Dataset/SETD/alzeimer_closed6.csv'])

Reading a sample

In [None]:
def to_numeric(csv):
    data = pd.read_csv(csv).values
    return data

def pair_up(x1,x2):
        sample = [x1,x2]
        if "closed" in x1:
            sample.append(0)
        elif "open" in x1:
            sample.append(1) 
        if "closed" in x2:
            sample.append(0)
        elif "open" in x2:
            sample.append(1)     

        sample[0] = to_numeric(x1)
        sample[1] = to_numeric(x2)

        return sample 

Getting Sample ready for Model Input

In [None]:
def as_input(sample):
    input = dict()
    input['wave1'] = tf.cast(tf.expand_dims(sample[0],0),dtype = tf.float32)
    input['wave2'] = tf.cast(tf.expand_dims(sample[1],0),dtype = tf.float32)
    input['state'] = tf.cast(tf.expand_dims([sample[2],sample[3]],0),dtype = tf.float32)
    return input

# TESTING 

In [None]:
def get_scores(afile):
    print("Query Sample :",afile)

    healthy_scores = []
    alz_scores = []

    reference_tests = 4 #No of times to compare 
    
    for ref in alz[:reference_tests]:
        pair = pair_up(afile,ref)
        pair = as_input(pair)
        output = np.squeeze(model(pair)).tolist()
        alz_scores.append(output)
        
    for ref in healthy[:reference_tests]:
        pair = pair_up(afile,ref)
        pair = as_input(pair)
        output = np.squeeze(model(pair)).tolist()
        healthy_scores.append(output)

    a_avg,h_avg = sum(alz_scores)/reference_tests,sum(healthy_scores)/reference_tests
    print("Dissimilarity Scores Query vs healthy: ",healthy_scores," Avg :",h_avg)
    print("Dissimilarity Scores Query vs alzheimer: ",alz_scores," Avg :",a_avg)

    if a_avg > h_avg:
        print("Query Sample Class Predicted : Healthy Sample")
        print("Result Confidence : ",(1-h_avg)*100," %")
    else:
        print("Query Sample Class Predicted : Alzheimer Sample")  
        print("Result Confidence : ",(1-a_avg)*100," %")  

In [None]:
for que in query:
    print("-----------------------------------------------------------------")
    get_scores(que)

-----------------------------------------------------------------
Query Sample : /content/Dataset/SETA/healthy_open12.csv
Dissimilarity Scores Query vs healthy:  [0.993556022644043, 0.06575310230255127, 0.11120369285345078, 0.10094550251960754]  Avg : 0.31786458007991314
Dissimilarity Scores Query vs alzheimer:  [0.9998642802238464, 0.9999452829360962, 0.999563455581665, 0.9999682307243347]  Avg : 0.9998353123664856
Query Sample Class Predicted : Healthy Sample
Result Confidence :  68.21354199200869  %
-----------------------------------------------------------------
Query Sample : /content/Dataset/SETC/alzeimer_open12.csv
Dissimilarity Scores Query vs healthy:  [0.008527429774403572, 0.9741566181182861, 0.9876042008399963, 0.9934801459312439]  Avg : 0.7409420986659825
Dissimilarity Scores Query vs alzheimer:  [0.023576848208904266, 0.023374736309051514, 0.02142222970724106, 0.06725795567035675]  Avg : 0.0339079424738884
Query Sample Class Predicted : Alzheimer Sample
Result Confidence

FROM THE ABOVE RESULTS WE CAN OBSERVE SOME INCORRECT SCORE PREDICTION FOR HEALTHY SAMPLES WHICH MAYBE AN ISSUE, BUT PREDICTIONS SCORES FOR ALZHEIMER SAMPLES ARE CORRECT.