In [None]:
# import libraries....
import pandas as pd
import numpy as np
import warnings
from sklearn.exceptions import DataConversionWarning
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
warnings.filterwarnings(action = 'ignore', category = DataConversionWarning)

In [None]:
# load datasets...

data = pd.read_csv("/content/diabetes.csv")
columns = ['Glucose', 'BloodPressure', 'SkinThickness','Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
data[columns] = data[columns].replace(0, np.NaN)
data = data.dropna()

#------------------------------------------------------------------------------     #Data dividing for tran, valid and test
ntrain = int(len(data)*0.60)
#------------------------------------------------------------------------------     # train, validation and test data
Xtrain = data.iloc[:ntrain, 0:8].values
Xtest = data.iloc[ntrain:, 0:8].values

Ytrain = data.iloc[:ntrain, 8:9].values
Ytest = data.iloc[ntrain:, 8:9].values
#-------------------------------------------------------------------------------      # standardized data
scaler_xtrain = MinMaxScaler()
xtrain_sc = scaler_xtrain.fit_transform(Xtrain)

scaler_xtest = MinMaxScaler()
xtest_sc = scaler_xtest.fit_transform(Xtest)
#------------------------------------------------------------------------------       # Adding ones column in the data
ones_tn = np.ones(shape = (xtrain_sc.shape[0],1))
xtrain_sc = np.hstack([ones_tn, xtrain_sc])

ones_ts = np.ones(shape = (xtest_sc.shape[0],1))
xtest_sc = np.hstack([ones_ts, xtest_sc])

train = np.append(Ytrain, xtrain_sc, axis = 1)

In [None]:
def intializer(layers):
  np.random.seed(3)
  W = np.random.randn(layers[0], layers[1])
  return (W)

def sigmoid(z):
  sig = 1.0/(1.0+np.exp(-z))
  return (sig)

def decision_fn(A):                                                            
  dec = np.where(A > 0.5, 1, 0)
  return(dec)

def predict(X, W):
  Z = np.dot(X, W)
  A = sigmoid(Z)
  A = decision_fn(A)
  return (A)

def indicesShuffle(number):                 #shuffling algo
  indexlist = []
  i = 0
  while (i< number):
    n = np.random.randint(0, number)
    if n in indexlist:
      i = i
    else:
      indexlist.append(n)
      i = i + 1
  return (indexlist)

def shuffler(X):
  number = X.shape[0]
  indexShuffle = indicesShuffle(number)
  shuffleArray = np.empty((X.shape))
  for stack,ix in zip(indexShuffle, range(number)):
    shuffleArray[ix] = X[stack]
  return (shuffleArray, indexShuffle)

def minimumcost(vault):
  ix = vault[4].index(min(vault[4]))                         
  W = vault[0][ix]
  W = W.T
  return (W)


def SPLPrediction(X,Y,W):     
  pred = predict(X, W)
  accuracy = accuracy_score(Y, pred)*100
  f1 = f1_score(Y, pred)*100
  misclassification = 100-accuracy
  result = [accuracy, f1, misclassification]
  return (result)
  
def SPLGD(X,Y,lr,epochs,layers):    #gradient descent for FSNN
  weights = []
  zpred = []
  Apred = []
  errpred = []
  costs = []
  grads = []
  vault = []
  W = intializer(layers)
  for epoch in range(epochs):
    Z = np.dot(X, W)
    A = sigmoid(Z)
    error = A - Y
    m = A.shape[0]
    loss = -(np.dot(Y.T, np.log(A)) + np.dot((1-Y.T),np.log(1-A)))
    cost = np.sum(loss)/m
    grad =np.dot(X.T, error)
    weights.append((W.T))
    zpred.append(Z.T)
    Apred.append(A.T)
    errpred.append(error.T)
    costs.append(cost)
    grads.append(grad.T)
    W = W - (lr/m)*(grad)
    vault = [weights, zpred, Apred, errpred, costs, grads]
  return (vault)

In [None]:
def FSLNN(Xn, Yn, Xt, Yt, allData, lr, epochs, layers, shuffleTime, trees):     #FSNN CORE ALGO
  #valut store....
  shuffdict = {}
  RFstackdata = {}
  RFtestStackData = {}
  GDcache = {}
  MinWeight = {}
  report = []

  # shuffling Algo...
  shuffle = 1
  while (shuffle <= shuffleTime):
    xtrain, indexes = shuffler(allData)
    shuffdict['shuffle '+str(shuffle)] = xtrain
    xdata = xtrain[:, 1:10]
    ydata = xtrain[:, 0:1]

    # Action from RandomForest  
    # 1) Train.....
    stackdata = np.empty(shape = (trees,Xn.shape[0]))
    for i in range(1, trees):
      clf = RandomForestClassifier(n_estimators=i)
      clf.fit(xdata, ydata)
      y_pred = clf.predict(xdata)
      stackdata[i]= y_pred
    RFstackdata['TrainSTack'+str(shuffle)] = stackdata 

    #2) Test.....
    teststackdata = np.empty(shape = (trees,Xt.shape[0]))
    for j in range(1, trees):
      clft = RandomForestClassifier(n_estimators=j)
      clft.fit(Xn, Yn)
      y_predt = clft.predict(Xt)
      teststackdata[j]= y_predt
    RFtestStackData['TestSTack'+str(shuffle)] = teststackdata 


    # Gradient descent...

    cache = SPLGD(X = stackdata.T,Y = ydata,lr = lr,epochs = epochs,layers = layers)
    GDcache['cache'+str(shuffle)] =  cache

    # weight at min cost

    W = minimumcost(cache)
    MinWeight['Weights selected'+str(shuffle)] = W

    # results..

    testcheck = SPLPrediction(teststackdata.T, Yt, W)
    report.append(testcheck)
    shuffle+=1
  vault = [shuffdict, RFstackdata, RFtestStackData, GDcache, MinWeight]
  return (report)

USE 60% of train data and 40% test

TEST-1

In [None]:
trees = 500
layers = [trees, 1]
lr = 0.8
iters = 5000
shuffleTime = 10
report1= FSLNN(Xn = xtrain_sc, Yn = Ytrain, Xt = xtest_sc, Yt = Ytest, allData = train, lr = lr, epochs = iters, layers = layers, shuffleTime = shuffleTime, trees = trees)

avg_results1 = np.asarray(report1)
accuracy1 = np.mean(avg_results1[:, 0:1])
f1_1 = np.mean(avg_results1[:, 1:2])
misclf1 = np.mean(avg_results1[:, 2:3])



In [None]:
print("Accuracy of the test model is : ", accuracy1)
print("F1 score of the test model is : ", f1_1)
print("Misclassification rate of the test model is : ", misclf1)

Accuracy of the test model is :  78.59872611464968
F1 score of the test model is :  68.78205128205127
Misclassification rate of the test model is :  21.40127388535031


TEST-2

In [None]:
trees = 200
layers = [trees, 1]
lr = 0.8
iters = 5000
shuffleTime = 10
report2= FSLNN(Xn = xtrain_sc, Yn = Ytrain, Xt = xtest_sc, Yt = Ytest, allData = train, lr = lr, epochs = iters, layers = layers, shuffleTime = shuffleTime, trees = trees)

avg_results2 = np.asarray(report2)
accuracy2 = np.mean(avg_results2[:, 0:1])
f1_2 = np.mean(avg_results2[:, 1:2])
misclf2 = np.mean(avg_results2[:, 2:3])



In [None]:
print("Accuracy of the test model is : ", accuracy2)
print("F1 score of the test model is : ", f1_2)
print("Misclassification rate of the test model is : ", misclf2)

Accuracy of the test model is :  64.20382165605095
F1 score of the test model is :  57.1785551232705
Misclassification rate of the test model is :  35.79617834394905


TEST-3

In [None]:
trees = 80
layers = [trees, 1]
lr = 0.8
iters = 5000
shuffleTime = 10
report3= FSLNN(Xn = xtrain_sc, Yn = Ytrain, Xt = xtest_sc, Yt = Ytest, allData = train, lr = lr, epochs = iters, layers = layers, shuffleTime = shuffleTime, trees = trees)

avg_results3 = np.asarray(report3)
accuracy3 = np.mean(avg_results3[:, 0:1])
f1_3 = np.mean(avg_results3[:, 1:2])
misclf3 = np.mean(avg_results3[:, 2:3])



In [None]:
print("Accuracy of the test model is : ", accuracy3)
print("F1 score of the test model is : ", f1_3)
print("Misclassification rate of the test model is : ", misclf3)

Accuracy of the test model is :  64.45859872611466
F1 score of the test model is :  56.52350504315284
Misclassification rate of the test model is :  35.541401273885356
