In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
path = '/content/drive/My Drive/data/'

In [3]:
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import os

## Used for training the classifiers
#### X_Train = Feature Matrix of all samples
#### Y_Train = Ground Truth values of all samples


## Used for evaluating performance
#### X_Val = Feature Matrix of all Samples
#### Y_Val = Ground Truth values of all samples


## What we have to predict and send predictions
#### X_Test = Feature Matrix of test samples

In [4]:
## Read Feature CSV into DataFrame and Split into X(Feature Matrix) and Y(Class Labels)
csvpath = path + '/Phase 1 Features/FeaturesCompleteFinal.csv'
X = pd.read_csv(csvpath)
print(X)

# Remove Image Name
X.drop('Image Name', axis = 1, inplace = True)
Y = X['Image Class']
X.drop('Image Class', axis = 1, inplace = True)
print(X)
print(Y)

                                     Image Name   Entropy  ...   GLCM96  Image Class
0      1546cd86-7be1-4c3c-934b-ef271c198c92.png  3.923832  ...  0.27808            0
1      dff20324-e70a-4972-9f88-5587453ffb30.png  4.231428  ...  0.21572            0
2      616797e1-e2d0-485e-9d2d-5c2bb7d7b5a7.png  4.703336  ...  0.15946            0
3      c148999c-9520-453a-9359-bfdc5c0090dd.png  4.171809  ...  0.23450            0
4      80b9b1cc-991a-428d-826c-fac3bf595d3c.png  3.198255  ...  0.35941            0
...                                         ...       ...  ...      ...          ...
16924  eb02cfcc-1607-43e5-ae90-9ce93a80f539.png  4.673976  ...  0.12940            3
16925  1556d32c-0e4a-4830-8360-f99925260c79.png  4.430534  ...  0.17850            3
16926  000013af-e145-469a-a79f-8acb23e194e2.png  4.013350  ...  0.26247            3
16927  34afb367-8f3a-46ca-aa76-8fed5f8cd8a8.png  3.419377  ...  0.33216            3
16928  24e50ce9-ed2c-4334-85b0-44a9cae6333f.png  3.438339  ...  0

In [5]:
Xarr = X.to_numpy()
Yarr = Y.to_numpy()


X_Train, X_Val, Y_Train, Y_Val = train_test_split(Xarr, Yarr, test_size=0.2, shuffle=True, random_state=4)

In [6]:
print(X_Train.shape)
print(Y_Train.shape)
print(X_Val.shape)
print(Y_Val.shape)

(13543, 113)
(13543,)
(3386, 113)
(3386,)


In [7]:
def getMetrics(Y_Pred, Y_Val):
  accuracy = 100 * metrics.accuracy_score(Y_Val, Y_Pred)
  precision = 100 * metrics.precision_score(Y_Val, Y_Pred, average='weighted')
  recall = 100 * metrics.recall_score(Y_Val,Y_Pred , average='macro')
  f1score = 100 * metrics.f1_score(Y_Val,Y_Pred, average = 'weighted')
  print("\nAccuracy: " + str(accuracy))
  print("\nPrecision: " + str(precision))
  print("\nRecall : " + str(recall))
  print("\nF1 Score: " + str(f1score))

In [8]:
def KNeighboursEval(X_Train, Y_Train, X_Val, Y_Val):
    knn = KNeighborsClassifier(n_neighbors=5, algorithm='kd_tree', leaf_size=50, p = 1, weights = 'distance')
    knn.fit(X_Train, Y_Train)

    Y_Pred = knn.predict(X_Val)
    print("K Nearest Neighbours Classifier \n")
    getMetrics(Y_Pred,Y_Val)

    return Y_Pred

In [9]:
def RandomForestsEval(X_Train, Y_Train, X_Val, Y_Val):
    rmf = RandomForestClassifier(n_estimators=100, bootstrap= False ,
                                random_state=0, max_features='auto', min_samples_split=2, 
                                min_weight_fraction_leaf=0.0, criterion='gini', max_depth = None
                                , min_impurity_decrease = 0.0, warm_start= True)
    rmf.fit(X_Train, Y_Train)

    Y_Pred = rmf.predict(X_Val)
    print("Random Forests Classifier \n")
    getMetrics(Y_Pred,Y_Val)

    return Y_Pred

In [10]:
def SVM_LinearEval(X_Train, Y_Train, X_Val, Y_Val):
    lsvm = svm.SVC(kernel="linear", C=0.025)
    lsvm.fit(X_Train, Y_Train)

    Y_Pred = lsvm.predict(X_Val)
    print("Linear SVM Classifier \n")
    getMetrics(Y_Pred,Y_Val)

    return Y_Pred

In [11]:
def SVM_RadialBasisEval(X_Train, Y_Train, X_Val, Y_Val):
    rbfsvm = svm.SVC(gamma=2, C=1)
    rbfsvm.fit(X_Train, Y_Train)

    Y_Pred = rbfsvm.predict(X_Val)
    print("Radial Basis Function SVM Classifier \n")
    getMetrics(Y_Pred,Y_Val)

    return Y_Pred

In [12]:
def DecisionTreesEval(X_Train, Y_Train, X_Val, Y_Val):
    dt = DecisionTreeClassifier(criterion="entropy", max_depth=3)
    dt.fit(X_Train, Y_Train)

    Y_Pred = dt.predict(X_Val)
    print("Decision Tree Classifier \n")
    getMetrics(Y_Pred,Y_Val)

    return Y_Pred

In [13]:
def NaiveBayesEval(X_Train, Y_Train, X_Val, Y_Val):
  gnb = GaussianNB()
  gnb.fit(X_Train, Y_Train)

  Y_Pred = gnb.predict(X_Val)
  print("Naive Bayes Classifier \n")
  getMetrics(Y_Pred,Y_Val)

  return Y_Pred

In [None]:
YPred = KNeighboursEval(X_Train, Y_Train, X_Val, Y_Val)


K Nearest Neighbours Classifier 


Accuracy: 61.66568222090962

Precision: 59.959674319988686

Recall : 52.56326149854394

F1 Score: 59.81317528416918


In [None]:
YPred = NaiveBayesEval(X_Train, Y_Train, X_Val, Y_Val)

Naive Bayes Classifier 


Accuracy: 50.324867099822804

Precision: 47.59272677839474

Recall : 45.470710762309054

F1 Score: 47.467449549848396


In [None]:
YPred = RandomForestsEval(X_Train, Y_Train, X_Val, Y_Val)

Random Forests Classifier 


Accuracy: 75.45776727702304

Precision: 75.26571361844636

Recall : 69.51391822908516

F1 Score: 74.91059898656503


In [14]:
YPred = SVM_LinearEval(X_Train, Y_Train, X_Val, Y_Val)

Linear SVM Classifier 


Accuracy: 71.38216184288245

Precision: 70.31196617140417

Recall : 63.80892772543142

F1 Score: 69.78069006265535


In [None]:
YPred = SVM_RadialBasisEval(X_Train, Y_Train, X_Val, Y_Val)

In [None]:
YPred = DecisionTreesEval(X_Train, Y_Train, X_Val, Y_Val)

Decision Tree Classifier 


Accuracy: 58.41701122268162

Precision: 49.386954610655266

Recall : 45.02749206475154

F1 Score: 50.567805095521365


  _warn_prf(average, modifier, msg_start, len(result))


# **ALL TESTING RELATED CODE HERE**



In [None]:
def FinalPredictor1(X_Train, Y_Train, X_Test):
    rmf = RandomForestClassifier(n_estimators=100, bootstrap= False ,
                                random_state=0, max_features='auto', min_samples_split=2, 
                                min_weight_fraction_leaf=0.0, criterion='gini', max_depth = None
                                , min_impurity_decrease = 0.0, warm_start= True)
    rmf.fit(X_Train, Y_Train)

    Y_Pred = rmf.predict(X_Test)
    print("Final Predictions - Random Forests Classifier \n")

    return Y_Pred

In [None]:
def FinalPredictor2(X_Train, Y_Train, X_Test):
    knn = KNeighborsClassifier(n_neighbors=5, algorithm='kd_tree', leaf_size=50, p = 1, weights = 'distance')
    knn.fit(X_Train, Y_Train)

    Y_Pred = knn.predict(X_Test)
    print("Final Predictions - K Nearest Neighbours Classifier \n")

    return Y_Pred

In [None]:
def FinalPredictor3(X_Train, Y_Train, X_Test):
    dt = DecisionTreeClassifier(criterion="entropy", max_depth=3)
    dt.fit(X_Train, Y_Train)

    Y_Pred = dt.predict(X_Test)
    print("Final Predictions - Decision Tree Classifier \n")

    return Y_Pred

In [None]:
def FinalPredictor4(X_Train, Y_Train, X_Test):
    gnb = GaussianNB()
    gnb.fit(X_Train, Y_Train)

    Y_Pred = gnb.predict(X_Test)
    print("Final Predictions - Naive Bayes Classifier \n")

    return Y_Pred

In [None]:
## Read Feature CSV into DataFrame and Split into X(Feature Matrix) and Y(Class Labels)
csvpath = path + 'Phase 1 Features/FeaturesCompleteFinal.csv'
X = pd.read_csv(csvpath)
print(X)

# Remove Image Name
X.drop('Image Name', axis = 1, inplace = True)
Y = X['Image Class']
X.drop('Image Class', axis = 1, inplace = True)
print(X)
print(Y)

Xtrainfinal = X.to_numpy()
Ytrainfinal = Y.to_numpy()


                                     Image Name   Entropy  ...   GLCM96  Image Class
0      1546cd86-7be1-4c3c-934b-ef271c198c92.png  3.923832  ...  0.27808            0
1      dff20324-e70a-4972-9f88-5587453ffb30.png  4.231428  ...  0.21572            0
2      616797e1-e2d0-485e-9d2d-5c2bb7d7b5a7.png  4.703336  ...  0.15946            0
3      c148999c-9520-453a-9359-bfdc5c0090dd.png  4.171809  ...  0.23450            0
4      80b9b1cc-991a-428d-826c-fac3bf595d3c.png  3.198255  ...  0.35941            0
...                                         ...       ...  ...      ...          ...
16924  eb02cfcc-1607-43e5-ae90-9ce93a80f539.png  4.673976  ...  0.12940            3
16925  1556d32c-0e4a-4830-8360-f99925260c79.png  4.430534  ...  0.17850            3
16926  000013af-e145-469a-a79f-8acb23e194e2.png  4.013350  ...  0.26247            3
16927  34afb367-8f3a-46ca-aa76-8fed5f8cd8a8.png  3.419377  ...  0.33216            3
16928  24e50ce9-ed2c-4334-85b0-44a9cae6333f.png  3.438339  ...  0

In [None]:
##Read Test CSV Feature File and calculate the predictions
#test_csvpath = path + 'Features4kTest.csv'
#test_csvpath = path + 'Features4kTestNoise.csv'
#test_csvpath = path + 'Features_mtec_test.csv'
testnoise_newcsvpath = path + 'Features4knoisytest.csv'

Xtest = pd.read_csv(testnoise_newcsvpath)
print(Xtest)

imgNames = Xtest['Image Name']
Xtest.drop('Image Name', axis = 1, inplace = True)

Xtestarr = Xtest.to_numpy()

YFinalPred = FinalPredictor4(Xtrainfinal, Ytrainfinal, Xtestarr)

                                    Image Name   Entropy  ...   GLCM95   GLCM96
0     45960b49-82e5-4f76-adbe-14d32c67b252.png  4.216229  ...  0.23183  0.21233
1     f86080e7-8ab8-4d7a-8e1e-613b2228049d.png  3.543693  ...  0.32257  0.31212
2     322d36d4-8d2b-4272-9dd3-2b426ce40cd2.png  4.486049  ...  0.18670  0.17286
3     107217d8-f1eb-4ef9-bb0c-1fe8bfab12cf.png  3.925286  ...  0.27145  0.25794
4     b8b31d85-321b-4b10-96fa-4b418d3bff89.png  4.306782  ...  0.23134  0.21791
...                                        ...       ...  ...      ...      ...
4230  97287139-ee40-4970-80ef-de858bcb85de.png  4.940680  ...  0.12265  0.11527
4231  132855e6-4b5b-42a0-9e01-704925001983.png  4.915170  ...  0.14328  0.13683
4232  01cda1bd-2e34-46da-9eb0-7b66ed7ccb1b.png  4.127192  ...  0.23719  0.23183
4233  e43af91f-3fb4-48ae-8696-b167a6e8488f.png  4.272987  ...  0.21910  0.20136
4234  b6c02f3d-115b-474f-9105-da2ab50424f9.png  3.967976  ...  0.23956  0.22510

[4235 rows x 114 columns]
Final Predict

In [None]:
print(YFinalPred.shape)

(4235,)


In [None]:
## Put Image Name and Predicted Class in Dataframe
imageClass = []

for element in YFinalPred:
  if(element==0):
    imageClass.append('Normal')
  elif(element==1):
    imageClass.append('COVID')
  elif(element==2):
    imageClass.append('pneumonia')
  elif(element==3):
    imageClass.append('Lung_Opacity')

#print(imageClass)

df = pd.DataFrame(list(zip(imgNames,imageClass)),columns=['Image Name', 'Image Class'])
print(df)


                                    Image Name   Image Class
0     45960b49-82e5-4f76-adbe-14d32c67b252.png        Normal
1     f86080e7-8ab8-4d7a-8e1e-613b2228049d.png  Lung_Opacity
2     322d36d4-8d2b-4272-9dd3-2b426ce40cd2.png        Normal
3     107217d8-f1eb-4ef9-bb0c-1fe8bfab12cf.png  Lung_Opacity
4     b8b31d85-321b-4b10-96fa-4b418d3bff89.png        Normal
...                                        ...           ...
4230  97287139-ee40-4970-80ef-de858bcb85de.png        Normal
4231  132855e6-4b5b-42a0-9e01-704925001983.png        Normal
4232  01cda1bd-2e34-46da-9eb0-7b66ed7ccb1b.png  Lung_Opacity
4233  e43af91f-3fb4-48ae-8696-b167a6e8488f.png        Normal
4234  b6c02f3d-115b-474f-9105-da2ab50424f9.png        Normal

[4235 rows x 2 columns]


In [None]:
## Write Dataframe row-wise into text file

f = open(path + 'noisy_test_new.txt', 'w')

for index,row in df.iterrows():
  f.write(row['Image Name'] + " " + row['Image Class'])
  f.write('\n')

f.close()