## 0 Server and Directory Checks

In [None]:
import sys
import os

print("Python executable:", sys.executable)
print("Current working directory:", os.getcwd())

## 1 Imports

In [None]:
import os
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet50
import random
from tqdm import tqdm
from classifier_models import Resnet50Model, Resnet18Model, Densenet121Model, BaseResnetModel
from active_learning_models import *
import numpy as np
from sklearn.cluster import KMeans
from costume_dataset import ChestXrayDataset

## 1 Run Parameters


In [None]:
dataset_path = "nih_chest_xrays_light"
batch_size = 32
epochs = 3

In [None]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(device)

## Create Datasets and Loaders

In [None]:
dataset = ChestXrayDataset( dataset_path, split_type='from_files')
print("Sample data:\n", dataset.df.head())

# Explore models

## ResNet-18 Model

In [None]:
resnet18_model = Resnet18Model(optimizer='Adam', loss_function='BCEWithLogitsLoss', freeze=False, pretrained=True)

In [None]:
train_loader = dataset.get_dataloader(from_split='train')
test_loader = dataset.get_dataloader(from_split='test')
resnet18_model.train_model(device, train_loader, epochs=3)
resnet18_model.evaluate(device, test_loader)

## ResNet-50 Model

In [None]:
# Define ResNet-50 Model
resnet50_model = Resnet50Model(optimizer='Adam', loss_function='BCEWithLogitsLoss', freeze=True, pretrained=True)

In [None]:
# Train & Evaluate Full Dataset
train_loader = dataset.get_dataloader(from_split='train')
test_loader = dataset.get_dataloader(from_split='test')

resnet50_model.train_model(device, train_loader, epochs=4)
resnet50_model.evaluate(device, test_loader)

In [None]:
# # Take just 10000 samples to train faster
# small_train_df = train_df.sample(10000, random_state=42)
# small_train_dataset = ChestXrayDataset(small_train_df, "nih_chest_xrays_light")
# small_train_loader = DataLoader(small_train_dataset, batch_size=32, shuffle=True)

small_train_loader = dataset.get_dataloader(from_split='train', sample_size=10000)
test_loader = dataset.get_dataloader(from_split='test')
# Train & Evaluate Full Dataset
resnet50_model.train_model(device, small_train_loader, epochs=4)
resnet50_model.evaluate(device, test_loader)

# AL pipeline


In [None]:
# print(dataset)
active_learning_pipeline = RandomSamplingActiveLearning(
    device=device,
    iterations=10,
    root_dir=dataset_path,
    epochs_per_iter=3,
    budget_per_iter=100,
    model_name='resnet18',
    objective_function_name='BCEWithLogitsLoss',
    optimizer_name='Adam',
    seed=42,
    test_sample_size=1000
    # dataset=dataset
)



In [None]:
active_learning_pipeline.run_pipeline()
plot_results(active_learning_pipeline)