# Feature Extracting model
This CNN will return a feature vector extracted from the images which will then be used as a vector to represent that image

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf


In [2]:
from tensorflow.keras.applications import ResNet101
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.models import Model

In [3]:
from tensorflow.keras.models import Sequential
resnet_model = Sequential()

pretrained_model= tf.keras.applications.ResNet101(include_top=False,
                  input_shape=(650 , 650 , 3),
                  pooling='avg',classes=1,
                  weights='imagenet')
for layer in pretrained_model.layers:
        layer.trainable=False

resnet_model.add(pretrained_model)
resnet_model.add(Flatten())

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet101_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m171446536/171446536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [4]:
resnet_model.summary()

# Loading the Dataset

In [5]:
dataset = pd.read_csv('/content/drive/MyDrive/siddhi2.0/dataset_3.csv')


In [6]:
dataset.head()

Unnamed: 0,Img_path,Intraventricular,Intraparenchymal,Subarachnoid,Epidural,Subdural,No_Hemorrhage
0,/content/drive/MyDrive/siddhi2.0/Patients_CT/0...,0,0,0,0,0,1
1,/content/drive/MyDrive/siddhi2.0/Patients_CT/0...,0,0,0,0,0,1
2,/content/drive/MyDrive/siddhi2.0/Patients_CT/0...,0,0,0,0,0,1
3,/content/drive/MyDrive/siddhi2.0/Patients_CT/0...,0,0,0,0,0,1
4,/content/drive/MyDrive/siddhi2.0/Patients_CT/0...,0,0,0,0,0,1


In [7]:
data = dataset.sample(frac=1)

In [8]:
img_path = data['Img_path']
Intraventricular_labels = data['Intraventricular']
Intraparenchymal_labels = data['Intraparenchymal']
Subarachnoid_labels = data['Subarachnoid']
Epidural_labels = data['Epidural']
Subdural_labels = data['Subdural']
No_Hemorrhage_labels = data['No_Hemorrhage']

In [9]:
print(len(img_path))

2501


In [10]:
print(np.sum(Intraventricular_labels))
print(np.sum(Intraparenchymal_labels))
print(np.sum(Subarachnoid_labels))
print(np.sum(Epidural_labels))
print(np.sum(Subdural_labels))
print(np.sum(No_Hemorrhage_labels))


24
73
18
173
56
2183


# Extracting the Features from the images

In [11]:
import cv2


X_features = []
for path in img_path:
  img = cv2.imread(path)
  t = np.array([img])
  y = resnet_model.predict(t)
  # print(y)
  X_features.extend(y)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━

In [12]:
resnet_model.save("/content/drive/MyDrive/siddhi2.0/XGB_models/feature_gen.keras")

# Handling the Data imbalance

In [13]:
import imblearn
from imblearn.over_sampling import SMOTE

In [14]:
smote1 = SMOTE(random_state=42)
smote2 = SMOTE(random_state=42)
smote3 = SMOTE(random_state=42)
smote4 = SMOTE(random_state=42)
smote5 = SMOTE(random_state=42)
smote6 = SMOTE(random_state=42)

X_res_iv , y_res_iv = smote1.fit_resample(X_features , Intraventricular_labels)
X_res_ip , y_res_ip = smote2.fit_resample(X_features , Intraparenchymal_labels)
X_res_sa , y_res_sa = smote3.fit_resample(X_features , Subarachnoid_labels)
X_res_ep , y_res_ep = smote4.fit_resample(X_features , Epidural_labels)
X_res_sd , y_res_sd = smote5.fit_resample(X_features , Subdural_labels)
X_res_no , y_res_no = smote6.fit_resample(X_features , No_Hemorrhage_labels)



In [15]:
print(f"total: {len(y_res_iv)} positive: {np.sum(y_res_iv)}")
print(f"total: {len(y_res_ip)} positive: {np.sum(y_res_ip)}")
print(f"total: {len(y_res_sa)} positive: {np.sum(y_res_sa)}")
print(f"total: {len(y_res_ep)} positive: {np.sum(y_res_ep)}")
print(f"total: {len(y_res_sd)} positive: {np.sum(y_res_sd)}")
print(f"total: {len(y_res_no)} positive: {np.sum(y_res_no)}")


total: 4954 positive: 2477
total: 4856 positive: 2428
total: 4966 positive: 2483
total: 4656 positive: 2328
total: 4890 positive: 2445
total: 4366 positive: 2183


# Training an ensemble of decision trees to classify the images represented as feature vectors

In [16]:
from xgboost import XGBClassifier

xg_model_iv = XGBClassifier(eval_metric='logloss' , random_state=42)
xg_model_iv.fit(X_res_iv , y_res_iv)

y_pred_iv = xg_model_iv.predict(X_res_iv)

In [18]:
from sklearn.metrics import classification_report , confusion_matrix

print(f"classification report:\n {classification_report(y_res_iv , y_pred_iv)} \nconfusion matrix:\n {confusion_matrix(y_res_iv , y_pred_iv)}")

classification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2477
           1       1.00      1.00      1.00      2477

    accuracy                           1.00      4954
   macro avg       1.00      1.00      1.00      4954
weighted avg       1.00      1.00      1.00      4954
 
confusion matrix:
 [[2477    0]
 [   0 2477]]


In [19]:
xg_model_iv.save_model("/content/drive/MyDrive/siddhi2.0/XGB_models/xg_model_iv.bin")



In [20]:
xg_model_ip = XGBClassifier(eval_metric='logloss' , random_state=42)
xg_model_ip.fit(X_res_ip , y_res_ip)

y_pred_ip = xg_model_ip.predict(X_res_ip)

In [21]:
print(f"classification report:\n {classification_report(y_res_ip , y_pred_ip)} \nconfusion matrix:\n {confusion_matrix(y_res_ip , y_pred_ip)}")
print("\n \n \n")

classification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2428
           1       1.00      1.00      1.00      2428

    accuracy                           1.00      4856
   macro avg       1.00      1.00      1.00      4856
weighted avg       1.00      1.00      1.00      4856
 
confusion matrix:
 [[2428    0]
 [   0 2428]]

 
 



In [22]:
xg_model_ip.save_model("/content/drive/MyDrive/siddhi2.0/XGB_models/xg_model_ip.bin")




In [23]:

xg_model_sa = XGBClassifier(eval_metric='logloss' , random_state=42)
xg_model_sa.fit(X_res_sa , y_res_sa)

y_pred_sa = xg_model_sa.predict(X_res_sa)

In [24]:

print(f"classification report:\n {classification_report(y_res_sa , y_pred_sa)} \nconfusion matrix:\n {confusion_matrix(y_res_sa , y_pred_sa)}")
print("\n \n \n")

classification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2483
           1       1.00      1.00      1.00      2483

    accuracy                           1.00      4966
   macro avg       1.00      1.00      1.00      4966
weighted avg       1.00      1.00      1.00      4966
 
confusion matrix:
 [[2483    0]
 [   0 2483]]

 
 



In [25]:
xg_model_sa.save_model("/content/drive/MyDrive/siddhi2.0/XGB_models/xg_model_sa.bin")




In [26]:

xg_model_ep = XGBClassifier(eval_metric='logloss' , random_state=42)
xg_model_ep.fit(X_res_ep , y_res_ep)

y_pred_ep = xg_model_ep.predict(X_res_ep)

In [27]:

print(f"classification report:\n {classification_report(y_res_ep , y_pred_ep)} \nconfusion matrix:\n {confusion_matrix(y_res_ep , y_pred_ep)}")
print("\n \n \n")

classification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2328
           1       1.00      1.00      1.00      2328

    accuracy                           1.00      4656
   macro avg       1.00      1.00      1.00      4656
weighted avg       1.00      1.00      1.00      4656
 
confusion matrix:
 [[2328    0]
 [   0 2328]]

 
 



In [28]:
xg_model_ep.save_model("/content/drive/MyDrive/siddhi2.0/XGB_models/xg_model_ep.bin")




In [29]:
xg_model_sd = XGBClassifier(eval_metric='logloss' , random_state=42)
xg_model_sd.fit(X_res_sd , y_res_sd)

y_pred_sd = xg_model_sd.predict(X_res_sd)

In [30]:

print(f"classification report:\n {classification_report(y_res_sd , y_pred_sd)} \nconfusion matrix:\n {confusion_matrix(y_res_sd , y_pred_sd)}")
print("\n \n \n")


classification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2445
           1       1.00      1.00      1.00      2445

    accuracy                           1.00      4890
   macro avg       1.00      1.00      1.00      4890
weighted avg       1.00      1.00      1.00      4890
 
confusion matrix:
 [[2445    0]
 [   0 2445]]

 
 



In [31]:
xg_model_sd.save_model("/content/drive/MyDrive/siddhi2.0/XGB_models/xg_model_sd.bin")




In [32]:

xg_model_no = XGBClassifier(eval_metric='logloss' , random_state=42)
xg_model_no.fit(X_res_no , y_res_no)

y_pred_no = xg_model_no.predict(X_res_no)

In [33]:

print(f"classification report:\n {classification_report(y_res_no , y_pred_no)} \nconfusion matrix:\n {confusion_matrix(y_res_no , y_pred_no)}")


classification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      2183
           1       1.00      1.00      1.00      2183

    accuracy                           1.00      4366
   macro avg       1.00      1.00      1.00      4366
weighted avg       1.00      1.00      1.00      4366
 
confusion matrix:
 [[2183    0]
 [   0 2183]]


In [34]:
xg_model_no.save_model("/content/drive/MyDrive/siddhi2.0/XGB_models/xg_model_no.bin")




# Temporary

In [35]:
feature_model = tf.keras.models.load_model("/content/drive/MyDrive/siddhi2.0/XGB_models/feature_gen.keras")

In [36]:
img = cv2.imread("/content/drive/MyDrive/siddhi2.0/Patients_CT/049/brain/14.jpg")
t = np.array([img])
x_f = feature_model.predict(t)

iv = xg_model_iv.predict(x_f)
ip = xg_model_ip.predict(x_f)
sa = xg_model_sa.predict(x_f)
ep = xg_model_ep.predict(x_f)
sd = xg_model_sd.predict(x_f)
no = xg_model_no.predict(x_f)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12s/step


In [37]:
print(f"iv : {iv}")
print(f"ip : {ip}")
print(f"sa : {sa}")
print(f"ep : {ep}")
print(f"sd : {sd}")
print(f"no : {no}")

iv : [0]
ip : [0]
sa : [0]
ep : [1]
sd : [0]
no : [0]


In [38]:
!pip show xgboost


Name: xgboost
Version: 2.1.3
Summary: XGBoost Python Package
Home-page: 
Author: 
Author-email: Hyunsu Cho <chohyu01@cs.washington.edu>, Jiaming Yuan <jm.yuan@outlook.com>
License: Apache-2.0
Location: /usr/local/lib/python3.10/dist-packages
Requires: numpy, nvidia-nccl-cu12, scipy
Required-by: 


In [39]:

!pip show scikit-learn


Name: scikit-learn
Version: 1.6.0
Summary: A set of python modules for machine learning and data mining
Home-page: https://scikit-learn.org
Author: 
Author-email: 
License: BSD 3-Clause License
         
         Copyright (c) 2007-2024 The scikit-learn developers.
         All rights reserved.
         
         Redistribution and use in source and binary forms, with or without
         modification, are permitted provided that the following conditions are met:
         
         * Redistributions of source code must retain the above copyright notice, this
           list of conditions and the following disclaimer.
         
         * Redistributions in binary form must reproduce the above copyright notice,
           this list of conditions and the following disclaimer in the documentation
           and/or other materials provided with the distribution.
         
         * Neither the name of the copyright holder nor the names of its
           contributors may be used to endorse 