<a href="https://colab.research.google.com/github/Shujaat123/DeepVAE-SRC/blob/main/AFP_SRC_Python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import sys, os, re, gc
import numpy as np
import pandas as pd
from random import sample

## Models
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from keras.models import Model
from keras.layers import Input, Dense, BatchNormalization, Dropout
from keras import metrics
from keras import optimizers
from keras.utils.np_utils import to_categorical

import numpy.linalg as LA

## Perfmetrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report, accuracy_score, matthews_corrcoef, balanced_accuracy_score, precision_recall_fscore_support
from sklearn.metrics import auc, average_precision_score, precision_recall_curve, roc_curve

## utilities
from matplotlib import pyplot as plt
!pip install wget
import wget

Collecting wget
  Downloading wget-3.2.zip (10 kB)
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9672 sha256=0c0e9984c2e56a8e34872b2bb97af006d3effc237d52a4e289216706bb16d61d
  Stored in directory: /root/.cache/pip/wheels/a1/b6/7c/0e63e34eb06634181c63adacca38b79ff8f35c37e3c13e3c02
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2


In [5]:
file1_path = 'https://raw.githubusercontent.com/NLPrinceton/sparse_recovery/master/solvers.py'
wget.download(file1_path, 'solvers.py')
from solvers import *

'solvers.py'

In [8]:
## Define CKSAAP feature-extraction function
def minSequenceLength(fastas):
	minLen = 10000
	for i in fastas:
		if minLen > len(i[1]):
			minLen = len(i[1])
	return minLen

def CKSAAP(fastas, gap=5, **kw):
	if gap < 0:
		print('Error: the gap should be equal or greater than zero' + '\n\n')
		return 0

	if minSequenceLength(fastas) < gap+2:
		print('Error: all the sequence length should be larger than the (gap value) + 2 = ' + str(gap+2) + '\n\n')
		return 0

	AA = 'ACDEFGHIKLMNPQRSTVWY'
	encodings = []
	aaPairs = []
	for aa1 in AA:
		for aa2 in AA:
			aaPairs.append(aa1 + aa2)
	header = ['#']
	for g in range(gap+1):
		for aa in aaPairs:
			header.append(aa + '.gap' + str(g))
	encodings.append(header)
	for i in fastas:
		name, sequence = i[0], i[1]
		code = [name]
		for g in range(gap+1):
			myDict = {}
			for pair in aaPairs:
				myDict[pair] = 0
			sum = 0
			for index1 in range(len(sequence)):
				index2 = index1 + g + 1
				if index1 < len(sequence) and index2 < len(sequence) and sequence[index1] in AA and sequence[index2] in AA:
					myDict[sequence[index1] + sequence[index2]] = myDict[sequence[index1] + sequence[index2]] + 1
					sum = sum + 1
			for pair in aaPairs:
				code.append(myDict[pair] / sum)
		encodings.append(code)
	return encodings

In [69]:
def delta_rule(A,x,b):
  num_samples_per_class = int(x.shape[0]/2)
  delta1 = 0*x
  delta2 = 0*x
  delta1[0:num_samples_per_class] = x[0:num_samples_per_class]
  delta2[num_samples_per_class:] = x[num_samples_per_class:]
  y1 = np.matmul(A,delta1)
  y2 = np.matmul(A,delta2)
  # print(delta1.shape, delta2.shape, y1.shape, y2.shape)
  r1 = np.linalg.norm(y1-b)
  r2 = np.linalg.norm(y2-b)

  if(r1<r2):
    label = 1
  else:
    label = 0

  return label


In [None]:
def yoden_index(y, y_pred):
  epsilon = 1e-30
  tn, fp, fn, tp = confusion_matrix(y, y_pred, labels=[0,1]).ravel()
  j = (tp/(tp + fn + epsilon)) + (tn/(tn+fp + epsilon)) - 1
  return j

def pmeasure(y, y_pred):
    epsilon = 1e-30
    tn, fp, fn, tp = confusion_matrix(y, y_pred, labels=[0,1]).ravel()
    sensitivity = tp / (tp + fn + epsilon)
    specificity = tn / (tn + fp + epsilon)
    f1score = (2 * tp) / (2 * tp + fp + fn + epsilon)
    return ({'Sensitivity': sensitivity, 'Specificity': specificity, 'F1-Score': f1score})

In [None]:
def Calculate_Stats(y_actual,y_pred):
  acc = accuracy_score(y_actual, y_pred)
  sen = pmeasure(y_actual, y_pred)['Sensitivity']
  spe = pmeasure(y_actual, y_pred)['Specificity']
  f1 = pmeasure(y_actual, y_pred)['F1-Score']
  mcc = matthews_corrcoef(y_actual, y_pred)
  bacc = balanced_accuracy_score(y_actual, y_pred)
  yi = yoden_index(y_actual, y_pred)
  #auc = roc_auc_score(y_actual, y_pred)
  
  #pre, rec, _ = precision_recall_curve(y_actual, y_score, pos_label=1)
  #fpr, tpr, _ = roc_curve(y_actual, y_score, pos_label=1)
  #auroc = auc(fpr, tpr)
  #aupr = auc(rec, pre)

  return acc, sen, spe, f1, mcc, bacc, yi

In [9]:
train_set = pd.read_csv("https://raw.githubusercontent.com/Shujaat123/AFP-SRC/master/data/train1.csv")
test_set = pd.read_csv("https://raw.githubusercontent.com/Shujaat123/AFP-SRC/master/data/test1.csv")

In [102]:
X_train = train_set.iloc[:, 1:].to_numpy()
y_train = np.asarray(train_set.CLASS)
y_train[y_train=='AFP']=1
y_train[y_train=='NON_AFP']=0
# y_train = to_categorical(y_train)

X_test = test_set.iloc[:, 1:].to_numpy()
y_test = np.asarray(test_set.CLASS)
y_test[y_test=='AFP']=1
y_test[y_test=='NON_AFP']=0
# y_test = to_categorical(y_test)

In [103]:
X_train = np.transpose(X_train)
y_train = np.transpose(y_train).astype(int)
X_test = np.transpose(X_test)
y_test = np.transpose(y_test).astype(int)

print(X_train.shape,X_test.shape)
print(y_train.shape,y_test.shape)


(840, 600) (840, 9372)
(600,) (9372,)


In [104]:
def Test_SRC(A,DATA,LABEL,verbose):
  # A = X_train
  # DATA = X_test
  # LABEL = y_test
  LABEL_PRED = []
  count = 0
  for ind in range(0,DATA.shape[1]):
    b = DATA[:,ind]
    x = NonnegativeBP(A, b, x0=None, tol=1E-4, niter=100, biter=32)
    label_out = delta_rule(A,x,b)
    if (verbose):
      check = label_out==LABEL[ind]
      if (check):
        count = count + 1
      accuracy = 100*count/(ind+1)
      print(ind+1, count, accuracy, LABEL[ind], label_out, check)
    LABEL_PRED.append(label_out)

  return np.array(LABEL_PRED)


In [None]:
y_train_pred = Test_SRC(X_train,X_train,y_train,0)
y_test_pred = Test_SRC(X_train,X_test,y_test,1)

1 1 100.0 1 1 True
2 2 100.0 1 1 True
3 3 100.0 1 1 True
4 4 100.0 1 1 True
5 5 100.0 1 1 True
6 6 100.0 1 1 True
7 7 100.0 1 1 True
8 8 100.0 1 1 True
9 9 100.0 1 1 True
10 10 100.0 1 1 True
11 11 100.0 1 1 True
12 12 100.0 1 1 True
13 13 100.0 1 1 True
14 14 100.0 1 1 True
15 15 100.0 1 1 True
16 16 100.0 1 1 True
17 17 100.0 1 1 True
18 18 100.0 1 1 True
19 19 100.0 1 1 True
20 20 100.0 1 1 True
21 21 100.0 1 1 True
22 22 100.0 1 1 True
23 23 100.0 1 1 True
24 24 100.0 1 1 True
25 25 100.0 1 1 True
26 26 100.0 1 1 True
27 27 100.0 1 1 True
28 28 100.0 1 1 True
29 29 100.0 1 1 True
30 30 100.0 1 1 True
31 31 100.0 1 1 True
32 32 100.0 1 1 True
33 33 100.0 1 1 True
34 34 100.0 1 1 True
35 35 100.0 1 1 True
36 36 100.0 1 1 True
37 37 100.0 1 1 True
38 38 100.0 1 1 True
39 39 100.0 1 1 True
40 40 100.0 1 1 True
41 41 100.0 1 1 True
42 42 100.0 1 1 True
43 43 100.0 1 1 True
44 44 100.0 1 1 True
45 45 100.0 1 1 True
46 46 100.0 1 1 True
47 47 100.0 1 1 True
48 48 100.0 1 1 True
49 49 100.

In [None]:
tr_acc, tr_sen, tr_spe, tr_f1, tr_mcc, tr_bacc, tr_yi = Calculate_Stats(y_train, y_train_pred)
t_acc, t_sen, t_spe, t_f1, t_mcc, t_bacc, t_yi = Calculate_Stats(y_test,y_test_pred)

In [None]:
print(tr_acc, tr_sen, tr_spe, tr_f1, tr_mcc, tr_bacc, tr_yi)
print(t_acc, t_sen, t_spe, t_f1, t_mcc, t_bacc, t_yi)


In [124]:
# Stats=[]

# for i in range(3):
#   y_train_pred = train_list[i]
#   y_test_pred = test_list[i]
  
#   ## Training Measures
#   tr_acc, tr_sen, tr_spe, tr_f1, tr_mcc, tr_bacc, tr_yi = Calculate_Stats(y_train, y_train_pred);
  
#   ## Validation Measures
#   #v_acc, v_sen, v_spe, v_f1, v_mcc, v_bacc, v_yi = Calculate_Stats(to_categorical(y_val),y_val_pred);
  
#   ## Test Measures
#   t_acc, t_sen, t_spe, t_f1, t_mcc, t_bacc, t_yi = Calculate_Stats(y_test,y_test_pred);

#   Stats.append([tr_acc, tr_sen, tr_spe, tr_f1, tr_mcc, tr_bacc, tr_yi,
#                 #              v_acc, v_sen, v_spe, v_f1, v_mcc, v_bacc, v_yi,
#                 t_acc, t_sen, t_spe, t_f1, t_mcc, t_bacc, t_yi])

# Statistics = np.asarray(Stats)

NameError: ignored

In [122]:
# def Show_Statistics(msg,mean_Stats, sd_Stats, sigfig):
#   print(msg.upper())
#   print(70*'-')
#   print('Accuracy:{} + {}'          .format(round(mean_Stats[0],sigfig), round(sd_Stats[0],sigfig)))
#   print('Sensitivity:{} + {} '      .format(round(mean_Stats[1],sigfig), round(sd_Stats[1],sigfig)))
#   print('Specificity:{} + {}'       .format(round(mean_Stats[2],sigfig), round(sd_Stats[2],sigfig)))
#   print('F1-Score:{} + {}'          .format(round(mean_Stats[3],sigfig), round(sd_Stats[3],sigfig)))
#   print('MCC:{} + {}'               .format(round(mean_Stats[4],sigfig), round(sd_Stats[4],sigfig)))
#   print('Balance Accuracy:{} + {}'  .format(round(mean_Stats[5],sigfig), round(sd_Stats[5],sigfig)))
#   print('Youden-Index:{} + {}'      .format(round(mean_Stats[6],sigfig), round(sd_Stats[6],sigfig)))
#   print(70*'-')

In [123]:
# Show_Statistics('Norm Training Results (MEAN)',Statistics[0][0:7],Statistics.std(axis=0)[0:7], 3)
# Show_Statistics('Norm Test Results (MEAN)',Statistics[0][7:14],Statistics.std(axis=0)[7:14], 3)
# Show_Statistics('Dict Training Results (MEAN)',Statistics[1][0:7],Statistics.std(axis=0)[0:7], 3)
# Show_Statistics('Dict Test Results (MEAN)',Statistics[1][7:14],Statistics.std(axis=0)[7:14], 3)
# Show_Statistics('Rec Training Results (MEAN)',Statistics[2][0:7],Statistics.std(axis=0)[0:7], 3)
# Show_Statistics('Rec Test Results (MEAN)',Statistics[2][7:14],Statistics.std(axis=0)[7:14], 3)
# #Show_Statistics('Test Results (MEAN)',Statistics.mean(axis=0)[14:21],Statistics.std(axis=0)[14:21], 3)

NameError: ignored