In [21]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.callbacks import ReduceLROnPlateau
import itertools

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from PIL import Image
import numpy as np
import pandas as pd
import os

# Path to your data directory
data_dir = '/Users/advait/Desktop/Knee_Arthritis_AI/data'
class_names = ["0Normal", "1Doubtful", "2Mild", "3Moderate", "4Severe"]

# Initialize lists to hold file paths and labels
file_paths = []
labels = []

# Load file paths and labels with verification for valid images
for idx, class_name in enumerate(class_names):
    class_dir = os.path.join(data_dir, class_name)
    for file_name in os.listdir(class_dir):
        if file_name.endswith(".png"):
            file_path = os.path.join(class_dir, file_name)
            try:
                # Check if the file can be opened as an image
                with Image.open(file_path) as img:
                    img.verify()  # Verify that it is a valid image
                file_paths.append(file_path)
                labels.append(idx)
            except Exception as e:
                print(f"Skipping invalid image: {file_path} - Error: {e}")

# Convert to numpy arrays
file_paths = np.array(file_paths)
labels = np.array(labels)

# Perform stratified split
train_paths, val_paths, train_labels, val_labels = train_test_split(
    file_paths, labels, test_size=0.2, stratify=labels, random_state=42
)

# Convert numeric labels to string class names
train_labels = [class_names[label] for label in train_labels]
val_labels = [class_names[label] for label in val_labels]

# Data augmentation for the training set
train_datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.3,
    horizontal_flip=True,
    fill_mode='nearest',
    rescale=1.0 / 255.0
)
val_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

# Create DataFrames for train and validation data
train_df = pd.DataFrame({'filename': train_paths, 'class': train_labels})
val_df = pd.DataFrame({'filename': val_paths, 'class': val_labels})

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    x_col='filename',
    y_col='class',
    target_size=(224, 224),
    batch_size=8,
    class_mode='categorical',
    shuffle=True
)
validation_generator = val_datagen.flow_from_dataframe(
    val_df,
    x_col='filename',
    y_col='class',
    target_size=(224, 224),
    batch_size=8,
    class_mode='categorical'
)

# Load InceptionV3 with pre-trained weights
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze all layers initially
for layer in base_model.layers:
    layer.trainable = False

# Build the top model
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(len(class_names), activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Compile model
model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

# Callbacks
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=5, min_lr=1e-6, verbose=1)
model_checkpoint = ModelCheckpoint('best_inceptionv3_model.keras', save_best_only=True, monitor='val_loss', mode='min')

# Calculate steps per epoch based on generator batch size
steps_per_epoch = len(train_paths) // train_generator.batch_size
validation_steps = len(val_paths) // validation_generator.batch_size

# Train model for 50 epochs
history = model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=50,
    validation_data=validation_generator,
    validation_steps=validation_steps,
    callbacks=[reduce_lr, model_checkpoint],
    verbose=1
)


In [None]:
# Define the dataset path
dataset_path = '/Users/advait/Desktop/Knee_Arthiritis_AI/data'

# Get the list of categories
categories = ['0Normal', '1Doubtful', '2Mild', '3Moderate', '4Severe']
data = []

# Collect data information
for category in categories:
    category_path = os.path.join(dataset_path, category)
    for filename in os.listdir(category_path):
        data.append((category, filename))


# Create a DataFrame
df = pd.DataFrame(data, columns=['Category', 'Filename'])
print(df.head())
print(df['Category'].value_counts())

# Encode the labels
label_encoder = LabelEncoder()
df['Encoded_Category'] = label_encoder.fit_transform(df['Category'])

# Split the data into train and test sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['Encoded_Category'])

print(train_df.shape, test_df.shape)

# Plot the distribution of categories
plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='Category')
plt.title('Distribution of Knee Arthritis Categories')
plt.show()

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

def get_predictions(generator, model):
    generator.reset()
    y_true = []
    y_pred = []
    steps = len(generator)
    for _ in range(steps):
        x, y = next(generator)
        y_true.extend(np.argmax(y, axis=1))
        y_pred.extend(np.argmax(model.predict(x), axis=1))
    return np.array(y_true), np.array(y_pred)

# Get predictions for training set
y_true_train, y_pred_train = get_predictions(train_generator, model)

# Get predictions for validation set
y_true_val, y_pred_val = get_predictions(validation_generator, model)

# Generate classification report for training set
classification_report_train = classification_report(y_true_train, y_pred_train, target_names=class_names, zero_division=1)
print("Classification Report - Training Set:")
print(classification_report_train)

# Generate classification report for validation set
classification_report_val = classification_report(y_true_val, y_pred_val, target_names=class_names, zero_division=1)
print("Classification Report - Validation Set:")
print(classification_report_val)

# Save the reports to files
with open('classification_report_train.txt', 'w') as f:
    f.write("Classification Report - Training Set:\n")
    f.write(classification_report_train)

with open('classification_report_val.txt', 'w') as f:
    f.write("Classification Report - Validation Set:\n")
    f.write(classification_report_val)

# Generate and plot confusion matrices
def plot_confusion_matrix(y_true, y_pred, title):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.title(title)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
    plt.savefig(f'{title.lower().replace(" ", "_")}.png')
    plt.close()

plot_confusion_matrix(y_true_train, y_pred_train, "Confusion Matrix - Training Set")
plot_confusion_matrix(y_true_val, y_pred_val, "Confusion Matrix - Validation Set")

# Print class distribution
print("\nClass Distribution - Training Set:")
print(np.unique(y_true_train, return_counts=True))
print("\nClass Distribution - Validation Set:")
print(np.unique(y_true_val, return_counts=True))
