## imports

In [1]:
import numpy as np
import pandas as pd
import os
os.sys.path
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import time
import matplotlib.pyplot as plt
import cv2
import seaborn as sns
sns.set_style('darkgrid')
import shutil
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Activation,Dropout,Conv2D, MaxPooling2D,BatchNormalization
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras import regularizers
from keras.applications import  ResNet50
from tensorflow.keras.models import Model
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from tensorflow.keras import backend as K
import time

## balance and trim 

In [None]:
def trim(df, max_samples, min_samples, column):
    df=df.copy()
    groups=df.groupby(column)    
    trimmed_df = pd.DataFrame(columns = df.columns)
    groups=df.groupby(column)
    for label in df[column].unique(): 
        group=groups.get_group(label)
        count=len(group)    
        if count > max_samples:
            sampled_group=group.sample(n=max_samples, random_state=123,axis=0)
            trimmed_df=pd.concat([trimmed_df, sampled_group], axis=0)
        else:
            if count>=min_samples:
                sampled_group=group        
                trimmed_df=pd.concat([trimmed_df, sampled_group], axis=0)
    print('after trimming, the maximum samples in any class is now ',max_samples, ' and the minimum samples in any class is ', min_samples)
    return trimmed_df

def balance(df, n, working_dir, img_size):
    df=df.copy()
    print('Initial length of dataframe is ', len(df))
    aug_dir=os.path.join(working_dir, 'aug')# directory to store augmented images
    if os.path.isdir(aug_dir):# start with an empty directory
        shutil.rmtree(aug_dir)
    os.mkdir(aug_dir)        
    for label in df['labels'].unique():    
        dir_path=os.path.join(aug_dir,label)    
        os.mkdir(dir_path) # make class directories within aug directory
    # create and store the augmented images  
    total=0
    gen=ImageDataGenerator(horizontal_flip=True,  rotation_range=20, width_shift_range=.2,
                                  height_shift_range=.2, zoom_range=.2)
    groups=df.groupby('labels') # group by class
    for label in df['labels'].unique():  # for every class               
        group=groups.get_group(label)  # a dataframe holding only rows with the specified label 
        sample_count=len(group)   # determine how many samples there are in this class  
        if sample_count< n: # if the class has less than target number of images
            aug_img_count=0
            delta=n - sample_count  # number of augmented images to create
            target_dir=os.path.join(aug_dir, label)  # define where to write the images
            msg='{0:40s} for class {1:^30s} creating {2:^5s} augmented images'.format(' ', label, str(delta))
            print(msg, '\r', end='') # prints over on the same line
            aug_gen=gen.flow_from_dataframe( group,  x_col='filepaths', y_col=None, target_size=img_size,
                                            class_mode=None, batch_size=1, shuffle=False, 
                                            save_to_dir=target_dir, save_prefix='aug-', color_mode='rgb',
                                            save_format='jpg')
            while aug_img_count<delta:
                images=next(aug_gen)            
                aug_img_count += len(images)
            total +=aug_img_count
    print('Total Augmented images created= ', total)
    # create aug_df and merge with train_df to create composite training set ndf
    aug_fpaths=[]
    aug_labels=[]
    classlist=os.listdir(aug_dir)
    for klass in classlist:
        classpath=os.path.join(aug_dir, klass)     
        flist=os.listdir(classpath)    
        for f in flist:        
            fpath=os.path.join(classpath,f)         
            aug_fpaths.append(fpath)
            aug_labels.append(klass)
    Fseries=pd.Series(aug_fpaths, name='filepaths')
    Lseries=pd.Series(aug_labels, name='labels')
    aug_df=pd.concat([Fseries, Lseries], axis=1)         
    df=pd.concat([df,aug_df], axis=0).reset_index(drop=True)
    print('Length of augmented dataframe is now ', len(df))
    return df 
   


## df


In [2]:
"D:\\Knee_data_Clahe\\cropped\\train"
IMGSZ= (224,224)
# train_path="C:\\Users\\91745\\Documents\\Datasets\\knee\\train" 
# test_path= "C:\\Users\\91745\\Documents\\Datasets\\knee\\test" 
# valid_path="C:\\Users\\91745\\Documents\\Datasets\\knee\\val" 

train_path="D:\\Knee_data_Clahe\\train" 
test_path= "D:\\Knee_data_Clahe\\test" 
valid_path= "D:\\Knee_data_Clahe\\val" 

# train_path="D:\\Knee_data_Clahe\\cropped\\train" 
# test_path= "D:\\Knee_data_Clahe\\cropped\\test" 
# valid_path= "D:\\Knee_data_Clahe\\cropped\\val" 

# train_path="D:\\cropped_knees\\train"
# test_path= "D:\\cropped_knees\\test" 
# valid_path="D:\\cropped_knees\\val" 



list_of_classes=['Healthy', 'Doubtful', 'Minimal', 'Moderate', 'Severe']
for d in [train_path, test_path, valid_path]:
    filepaths = []
    labels=[] 
    classlist=os.listdir(d)   
    for klass in classlist:
        intklass=int(klass)
        label=list_of_classes[intklass]
        classpath=os.path.join(d, klass)
        flist=os.listdir(classpath)        
        for f in flist:
            fpath=os.path.join(classpath,f)
            filepaths.append(fpath)
            labels.append(label)
    Fseries=pd.Series(filepaths, name='filepaths')
    Lseries=pd.Series(labels, name='labels')        
    pdf=pd.concat([Fseries, Lseries], axis=1)
    if d == test_path:
        test_df=pdf
    elif d == valid_path:
        valid_df=pdf
    else:
        train_df=pdf
print('train_df lenght: ', len(train_df), '  test_df length: ', len(test_df), '  valid_df length: ', len(valid_df))
# get the number of classes and the images count for each class in train_df
classes=sorted(list(train_df['labels'].unique()))
class_count = len(classes)
print('The number of classes in the dataset is: ', class_count)
groups=train_df.groupby('labels')
print('{0:^30s} {1:^13s}'.format('CLASS', 'IMAGE COUNT'))
countlist=[]
classlist=[]
for label in sorted(list(train_df['labels'].unique())):
    group=groups.get_group(label)
    countlist.append(len(group))
    classlist.append(label)
    print('{0:^30s} {1:^13s}'.format(label, str(len(group))))

# get the classes with the minimum and maximum number of train images
max_value=np.max(countlist)
max_index=countlist.index(max_value)
max_class=classlist[max_index]
min_value=np.min(countlist)
min_index=countlist.index(min_value)
min_class=classlist[min_index]
print(max_class, ' has the most images= ',max_value, ' ', min_class, ' has the least images= ', min_value)
# lets get the average height and width of a sample of the train images
ht=0
wt=0
# select 100 random samples of train_df
train_df_sample=train_df.sample(n=100, random_state=123,axis=0)
for i in range (len(train_df_sample)):
    fpath=train_df_sample['filepaths'].iloc[i]
    img=plt.imread(fpath)
    shape=img.shape
    ht += shape[0]
    wt += shape[1]
print('average height= ', ht//100, ' average width= ', wt//100, 'aspect ratio= ', ht/wt)

train_df lenght:  5839   valid_df length:  826
The number of classes in the dataset is:  5
            CLASS               IMAGE COUNT 
           Doubtful                1046     
           Healthy                 2286     
           Minimal                 1516     
           Moderate                 802     
            Severe                  189     
Healthy  has the most images=  2286   Severe  has the least images=  189
average height=  224  average width=  224 aspect ratio=  1.0


In [53]:
# Drop specified classes

drop_classes = ["Healthy",'Moderate', 'Severe' ]

train_df = train_df[~train_df['labels'].isin(drop_classes)]
valid_df = valid_df[~valid_df['labels'].isin(drop_classes)]  
test_df = test_df[~test_df['labels'].isin(drop_classes)]

# Update the list of classes 
list_of_classes = [c for c in list_of_classes if c not in drop_classes]

# Re-calculate the total classes
class_count = len(list_of_classes)

# Re-count the images per class
groups = train_df.groupby('labels')  
for label in list_of_classes:
    group = groups.get_group(label) 
    print(label, len(group))

Healthy 2286
Minimal 1516
Severe 173


## now trim

In [None]:
max_samples=500
# since each class has more than 200 images all classes will be trimmed to have 200 images per class
min_samples=500
column='labels'
train_df= trim(train_df, max_samples, min_samples, column)

In [None]:
n=500 # number of samples in each class
working_dir=r'./' # directory to store augmented images
img_size=IMGSZ # size of augmented images
train_df=balance(train_df, n, working_dir, img_size) 

In [None]:
print (train_df['labels'].value_counts())

## pre processing and f ex

In [5]:
def preprocess_and_extract_features(df, img_size):
    img_data = []
    labels = []

    for index, row in df.iterrows():
        img = cv2.imread(row['filepaths'])
        img = cv2.resize(img, img_size)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img / 255.0
        img_data.append(img)
        labels.append(row['labels'])

    img_data = np.array(img_data)
    labels = np.array(labels)
    return img_data, labels

img_size = (224, 224)
train_data, train_labels = preprocess_and_extract_features(train_df, img_size)
valid_data, valid_labels = preprocess_and_extract_features(valid_df, img_size)


# Load ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(img_size[0], img_size[1], 3))
x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
model = Model(inputs=base_model.input, outputs=x)

# Extract features
train_features = model.predict(train_data)
valid_features = model.predict(valid_data)



from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.svm import SVC

# Train an SVM classifier
svm_classifier = SVC(kernel='linear', random_state=123, C=1.0)
svm_classifier.fit(train_features, train_labels)

# Predict using the trained SVM classifier
valid_predictions_svm = svm_classifier.predict(valid_features)

# Evaluate the performance of the SVM classifier
accuracy_svm = accuracy_score(valid_labels, valid_predictions_svm)
print(f'Accuracy on validation data (SVM): {accuracy_svm:.2f}')

print(classification_report(valid_labels, valid_predictions_svm))




Accuracy on validation data (SVM): 0.38
              precision    recall  f1-score   support

    Doubtful       0.22      0.25      0.24       153
     Healthy       0.56      0.49      0.52       328
     Minimal       0.35      0.26      0.30       212
    Moderate       0.28      0.47      0.35       106
      Severe       0.31      0.41      0.35        27

    accuracy                           0.38       826
   macro avg       0.34      0.37      0.35       826
weighted avg       0.40      0.38      0.38       826



In [9]:
def preprocess_and_extract_features1(df, img_size):
    img_data = []
    labels = []

    for index, row in df.iterrows():
        img = cv2.imread(row['filepaths'])
        img = cv2.resize(img, img_size)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img / 255.0
        img_data.append(img)
        labels.append(row['labels'])

    img_data = np.array(img_data)
    labels = np.array(labels)
    return img_data, labels


test_data1, test_labels1 = preprocess_and_extract_features1(test_df, img_size)
test_features1 = model.predict(test_data1)



In [10]:
# Calculate the accuracy on the test data
test_predictions = svm_classifier.predict(test_features1)
test_accuracy = accuracy_score(test_labels1, test_predictions)
print(f'Accuracy on test data: {test_accuracy:.2f}')

Accuracy on test data: 0.42


In [11]:
conf_matrix = confusion_matrix(test_labels1, test_predictions)
precision = precision_score(test_labels1, test_predictions, average='micro')  # Choose 'micro', 'macro', 'weighted' for multiclass
recall = recall_score(test_labels1, test_predictions, average='micro')  # Choose 'micro', 'macro', 'weighted' for multiclass
f1 = f1_score(test_labels1, test_predictions, average='micro')  # Choose 'micro', 'macro', 'weighted' for multiclass
print(classification_report(test_labels1, test_predictions))

# Print the results
print(f'Confusion Matrix:\n{conf_matrix}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')

              precision    recall  f1-score   support

    Doubtful       0.36      0.27      0.31       100
     Healthy       0.36      0.53      0.43       100
     Minimal       0.33      0.24      0.28       100
    Moderate       0.37      0.46      0.41       100
      Severe       0.77      0.61      0.68       100

    accuracy                           0.42       500
   macro avg       0.44      0.42      0.42       500
weighted avg       0.44      0.42      0.42       500

Confusion Matrix:
[[27 41 16 16  0]
 [25 53 12 10  0]
 [14 34 24 22  6]
 [ 8 18 16 46 12]
 [ 1  2  4 32 61]]
Precision: 0.42
Recall: 0.42
F1 Score: 0.42


In [12]:
for i in range(len(test_labels1)):
    print(test_labels1[i]," ", test_predictions[i])

Healthy   Doubtful
Healthy   Moderate
Healthy   Minimal
Healthy   Healthy
Healthy   Doubtful
Healthy   Healthy
Healthy   Moderate
Healthy   Doubtful
Healthy   Doubtful
Healthy   Healthy
Healthy   Healthy
Healthy   Healthy
Healthy   Minimal
Healthy   Doubtful
Healthy   Healthy
Healthy   Healthy
Healthy   Doubtful
Healthy   Healthy
Healthy   Healthy
Healthy   Healthy
Healthy   Moderate
Healthy   Moderate
Healthy   Moderate
Healthy   Doubtful
Healthy   Healthy
Healthy   Healthy
Healthy   Moderate
Healthy   Healthy
Healthy   Doubtful
Healthy   Healthy
Healthy   Minimal
Healthy   Healthy
Healthy   Doubtful
Healthy   Minimal
Healthy   Minimal
Healthy   Healthy
Healthy   Healthy
Healthy   Minimal
Healthy   Doubtful
Healthy   Doubtful
Healthy   Healthy
Healthy   Healthy
Healthy   Minimal
Healthy   Healthy
Healthy   Healthy
Healthy   Healthy
Healthy   Healthy
Healthy   Doubtful
Healthy   Healthy
Healthy   Healthy
Healthy   Healthy
Healthy   Doubtful
Healthy   Healthy
Healthy   Healthy
Healthy  

In [50]:
import os
import random

# Define the directory paths for classes 0, 3, and 4
class_directories = {
    '0': r'C:\Users\deepa\OneDrive\Desktop\major\knee\test\0',
   # '3': '/content/drive/MyDrive/Colab Notebooks/Knee_data_Clahe/test/3',
    '4': r'C:\Users\deepa\OneDrive\Desktop\major\knee\train\4'
}

# Specify the number of random images to select
num_random_images = 5

# Initialize lists to store actual and predicted labels
actual_labels = []
predicted_labels = []

# Select random images from the specified classes
for label, directory in class_directories.items():
    image_files = os.listdir(directory)
    random_images = random.sample(image_files, num_random_images)
    for image_file in random_images:
        actual_labels.append(label)
        image_path = os.path.join(directory, image_file)

        # Preprocess and extract features from the image
        img = cv2.imread(image_path)
        img = cv2.resize(img, img_size)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img / 255.0
        img = np.expand_dims(img, axis=0)  # Reshape to match model input

        # Use the trained Random Forest classifier to predict the label
        predicted_label = rf_classifier.predict(model.predict(img))
        predicted_labels.append(predicted_label[0])

# Display the actual and predicted labels for each image
for i in range(num_random_images):
    actual_label = actual_labels[i]
    predicted_label = predicted_labels[i]
    print(f"Image {i + 1}: Actual Label: {actual_label}, Predicted Label: {predicted_label}")


Image 1: Actual Label: 0, Predicted Label: Healthy
Image 2: Actual Label: 0, Predicted Label: Healthy
Image 3: Actual Label: 0, Predicted Label: Healthy
Image 4: Actual Label: 0, Predicted Label: Healthy
Image 5: Actual Label: 0, Predicted Label: Healthy
