In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from src.preprocess import preprocess_image
from src.train import train_model
from src.evaluate import evaluate_model
from src.gradcam import generate_and_save_gradcam

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv("../data/Ground_Truth.csv")
df['image_path'] = df['Image Index'].apply(lambda x: f"../data/images/{x}")
df['labels'] = df['Finding Labels'].apply(lambda x: x.split('|'))
df.head()


In [None]:
sample_path = df['image_path'].iloc[0]
img = preprocess_image(sample_path)

plt.imshow(img.squeeze(), cmap='gray')
plt.title("Preprocessed Image")
plt.axis('off')
plt.show()


In [None]:
mlb = MultiLabelBinarizer()
Y = mlb.fit_transform(df['labels'])
class_names = mlb.classes_

train_df, temp_df, y_train, y_temp = train_test_split(df, Y, test_size=0.2, stratify=Y, random_state=42)
val_df, test_df, y_val, y_test = train_test_split(temp_df, y_temp, test_size=0.5, stratify=y_temp, random_state=42)


In [None]:
X_train = np.array([preprocess_image(p) for p in train_df['image_path'][:500]])
y_train = y_train[:500]

X_val = np.array([preprocess_image(p) for p in val_df['image_path'][:100]])
y_val = y_val[:100]


In [None]:
model, history = train_model(X_train, y_train, X_val, y_val, num_classes=len(class_names))


In [None]:
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.legend()
plt.title("Loss over Epochs")
plt.show()


In [None]:
X_test = np.array([preprocess_image(p) for p in test_df['image_path'][:100]])
y_test_sample = y_test[:100]

metrics = evaluate_model(model, X_test, y_test_sample, class_names)
print(metrics)


In [None]:
generate_and_save_gradcam(
    model=model,
    image_path=test_df['image_path'].iloc[0],
    class_index=0,
    class_name=class_names[0],
    preprocess_fn=preprocess_image
)
