<a href="https://colab.research.google.com/github/Huzi9951/superman-or-batman/blob/main/project_notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torchvision
from torch import nn

In [None]:
import zipfile
import os
zip_file_path = 'archive.zip'
extraction_path = 'data/'
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
  zip_ref.extractall(extraction_path)



In [None]:
parent_directory = "/content/data"  # Or any desired path
subfolder1_name = "Superman"
subfolder2_name = "Batman"
os.makedirs(parent_directory, exist_ok=True)
subfolder1_path = os.path.join(parent_directory, subfolder1_name)
subfolder2_path = os.path.join(parent_directory, subfolder2_name)
os.makedirs(subfolder1_path, exist_ok=True)
os.makedirs(subfolder2_path, exist_ok=True)
print("Done")

In [None]:
from pathlib import Path
import shutil

# Base dataset directory
dataset_path = Path("data/Superman or Batman")

# Create subfolders for cleaned data
superman_path = dataset_path.parent / "Superman"
batman_path = dataset_path.parent / "Batman"
superman_path.mkdir(exist_ok=True)
batman_path.mkdir(exist_ok=True)

# Loop over all .txt files
for txt_file in dataset_path.glob("*.txt"):
    # Collect all labels in this file
    with open(txt_file, "r") as f:
        labels = {line.strip().split()[0] for line in f if line.strip()}

    # Check image file (assuming same name with .jpg)
    image_file = txt_file.with_suffix(".jpg")

    if "0" in labels and "1" in labels:
        # ❌ Mixed labels → delete both
        print(f"Deleting mixed-label pair: {txt_file.name}")
        txt_file.unlink(missing_ok=True)
        image_file.unlink(missing_ok=True)
    elif labels == {"0"}:
        # ✅ Only Superman → move to superman folder
        print(f"Moving {txt_file.name} to Superman folder")
        shutil.move(txt_file, superman_path / txt_file.name)
        if image_file.exists():
            shutil.move(image_file, superman_path / image_file.name)
    elif labels == {"1"}:
        # ✅ Only Batman → move to batman folder
        print(f"Moving {txt_file.name} to Batman folder")
        shutil.move(txt_file, batman_path / txt_file.name)
        if image_file.exists():
            shutil.move(image_file, batman_path / image_file.name)
    else:
        print(f"Unknown label format in {txt_file.name}: {labels}")


In [None]:
from pathlib import Path

# Define paths to the subfolders
base_path = Path("data/")
superman_path = base_path / "Superman"
batman_path = base_path / "Batman"

# Function to delete all .txt files in a folder
def delete_txt_files(folder_path):
    for txt_file in folder_path.glob("*.txt"):
        print(f"Deleting: {txt_file}")
        txt_file.unlink()

# Delete .txt files from both folders
delete_txt_files(superman_path)
delete_txt_files(batman_path)


In [None]:
def walk_through_dir(dir_path):
  #Walk through dir_path returning its content
  for dirpath,dirnames,filenames in os.walk(dir_path):
    print(f"tere are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'")



In [None]:
walk_through_dir('/content/data')

In [None]:
import shutil
from pathlib import Path

# Replace with your dataset root if different
checkpoint_dir = Path("data/.ipynb_checkpoints")

if checkpoint_dir.exists():
    shutil.rmtree(checkpoint_dir)
    print("Deleted .ipynb_checkpoints folder.")
else:
    print("No .ipynb_checkpoints folder found.")


In [None]:
#train and test split
import os
import shutil
from pathlib import Path
from sklearn.model_selection import train_test_split

# Base directories
original_data_dir = Path("/content/data")
new_base_dir = original_data_dir  # reuse the base

# Classes
classes = ["Superman", "Batman"]

# Create train/test directories
for split in ["train", "test"]:
    for cls in classes:
        Path(new_base_dir / split / cls).mkdir(parents=True, exist_ok=True)

# Function to split and move images
def split_data(class_name):
    src_dir = original_data_dir / class_name
    all_images = [f for f in os.listdir(src_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

    train_imgs, test_imgs = train_test_split(all_images, test_size=0.2, random_state=42)

    for img in train_imgs:
        shutil.copy(src_dir / img, new_base_dir / "train" / class_name / img)
    for img in test_imgs:
        shutil.copy(src_dir / img, new_base_dir / "test" / class_name / img)

    print(f"{class_name}: {len(train_imgs)} train, {len(test_imgs)} test")

# Apply to both classes
for cls in classes:
    split_data(cls)



In [None]:
image_path="data"

In [None]:
from pathlib import Path

image_path = Path(image_path)

train_dir=image_path/ "train"
test_dir=image_path/ "test"
test_dir,train_dir

In [None]:
import random
from PIL import Image
image_path_list=list(image_path.glob('*/*/*.jpg'))
random_image_path = random.choice(image_path_list)
print(random_image_path)
image_class=random_image_path.parent.name
print(image_class)
img = Image.open(random_image_path)
img.size

In [None]:
import matplotlib.pyplot as plt
import numpy as np
img_as_array=np.asarray(img)
plt.imshow(img)
plt.axis(False)

In [None]:
img_as_array

In [None]:
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms


In [None]:
transformer=transforms.Compose([
    transforms.Resize(size=(128,128)),
    transforms.AugMix(),
    transforms.ToTensor()
])

In [None]:
data_transform=transformer(img)

In [None]:
plt.figure(figsize=(10,7))
plt.subplot(1,2,1)
plt.imshow(img)
plt.axis(False)
plt.title(image_class)
plt.subplot(1,2,2)
plt.imshow(data_transform.permute(1,2,0))
plt.axis(False)
plt.title(image_class)

In [None]:
def plot_transformed_images(image_paths: list,transform,n=3,seed=None):
  #selects random images from a path of images and
  #loads/transforms them then plots og vs transformed plot
  if seed:
    random.seed(seed)
  random_image_paths = random.sample(image_paths,k=n)
  for image_path in random_image_paths:
    with Image.open(image_path) as f:
      fig,ax=plt.subplots(nrows=1,ncols=2)
      ax[0].imshow(f)
      ax[0].set_title(f"original\nSize: {f.size}")
      ax[0].axis(False)
      #transform and plot target image
      transformed_image=transform(f).permute(1,2,0)
      ax[1].imshow(transformed_image)
      ax[1].set_title(f"transformed\nshape: {transformed_image.shape}")
      ax[1].axis(False)

      fig.suptitle(f"class: {image_path.parent.stem}",fontsize=16)

plot_transformed_images(image_paths=image_path_list,transform=transformer,
                        n=3)

In [None]:
train_dataset = datasets.ImageFolder(root=train_dir,
                                     transform=transformer,
                                     target_transform=None)
test_dataset = datasets.ImageFolder(root=test_dir,
                                     transform=transformer,
                                     target_transform=None)

In [None]:
len(train_dataset)

In [None]:
class_name=train_dataset.classes
class_name

In [None]:
class_to_idx=train_dataset.class_to_idx
class_to_idx

In [None]:
img,label=train_dataset[7]
plt.imshow(img.permute(1,2,0))
plt.title(class_name[label])

In [None]:
BATCH_SIZE=1
train_dataloader = DataLoader(dataset=train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True)
test_dataloader = DataLoader(dataset=test_dataset,
                             batch_size=BATCH_SIZE,
                             shuffle=False)
len(train_dataloader),len(test_dataloader)

In [None]:
img,label=next(iter(train_dataloader))
img.shape,label

In [None]:
class VGG(nn.Module):
  def __init__(self,input_shape: int,hidden_units: int, output_layer: int):
    super().__init__()
    self.block_1=nn.Sequential(
        nn.Conv2d(in_channels=input_shape,out_channels=hidden_units,
                  kernel_size=3,stride=1,padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,out_channels=hidden_units,
                  kernel_size=3,stride=1,padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )
    self.block_2=nn.Sequential(
        nn.Conv2d(in_channels=hidden_units,out_channels=hidden_units,
                  kernel_size=3,stride=1,padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,out_channels=hidden_units,
                  kernel_size=3,stride=1,padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )
    self.block_3=nn.Sequential(
        nn.Conv2d(in_channels=hidden_units,out_channels=hidden_units,
                  kernel_size=3,stride=1,padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units,out_channels=hidden_units,
                  kernel_size=3,stride=1,padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )
    self.classifier=nn.Sequential(
        nn.Flatten(),
        nn.Linear(in_features=hidden_units*256,out_features=output_layer)
    )
  def forward(self,x):
    x=self.block_1(x)
    x=self.block_2(x)
    x=self.block_3(x)
    x=self.classifier(x)
    return x

In [None]:
device="cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
model_0=VGG(input_shape=3,hidden_units=30,output_layer=len(train_dataset.classes)).to(device)

In [None]:
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(params=model_0.parameters())

In [None]:
!pip install torchinfo

In [None]:
from torchinfo import summary
summary(model=model_0,input_size=(1,3,128,128))

In [None]:
def train_step(model:torch.nn.Module,DataLoader:torch.utils.data.DataLoader,
               loss_fn:torch.nn.Module,optimizer: torch.optim.Optimizer,
               device=device):
  total_loss,acc_score=0,0
  model.train()
  for batch,(X,y) in enumerate(DataLoader):
    X,y=X.to(device),y.to(device)
    y_logits=model(X)
    loss=loss_fn(y_logits,y)
    total_loss+=loss.item()
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    y_pred_class = torch.argmax(torch.softmax(y_logits, dim=1), dim=1)
    acc_score+=(y_pred_class == y).sum().item()/len(y_logits)
  total_loss/=len(DataLoader)
  acc_score/=len(DataLoader)
  return total_loss,acc_score
def test_step(model:torch.nn.Module,
              dataloader:torch.utils.data.DataLoader,
              loss_fn:torch.nn.Module,
              device=device):
  model.eval()
  test_loss,test_acc=0,0
  with torch.inference_mode(): # Call torch.inference_mode as a function
    for batch,(X,y) in enumerate(dataloader):
      X,y=X.to(device),y.to(device)
      y_logits=model(X)
      loss=loss_fn(y_logits,y)
      test_loss+=loss.item()
      y_pred_class = torch.argmax(torch.softmax(y_logits, dim=1), dim=1)
      test_acc+=(y_pred_class == y).sum().item()/len(y_logits) # Corrected variable name
    test_loss/=len(dataloader)
    test_acc/=len(dataloader) # Corrected variable name
    return test_loss,test_acc

In [None]:
from tqdm.auto import tqdm


In [None]:
def train_test_loop(epoch:int,model:nn.Module,
                    train_dataloader:torch.utils.data.DataLoader,
                    test_dataloader:torch.utils.data.DataLoader,
                    loss_fn:torch.nn.Module,optimizer=torch.optim.Optimizer,
                    device=device):
  result={'train_loss':[],
          'train_acc':[],
          'test_loss':[],
          'test_acc':[]}
  for epoch in tqdm(range(epoch)):
    print(f"epoch: {epoch}")
    train_loss,train_acc=train_step(model=model,DataLoader=train_dataloader,
               loss_fn=loss_fn,optimizer=optimizer,device=device)
    test_loss,test_acc=test_step(model=model,dataloader=test_dataloader,
              loss_fn=loss_fn,device=device)

    print(f"epoch:{epoch}|train_acc:{train_acc*100:.4f}|train_loss:{train_loss:.4f}|test_acc:{test_acc*100:.4f}|test_loss:{test_loss:.4f}")
    result["train_loss"].append(train_loss)
    result["train_acc"].append(train_acc)
    result["test_loss"].append(test_loss)
    result["test_acc"].append(test_acc)
  return result

In [None]:
torch.manual_seed(42)
NUM_EPOCHS=70
from timeit import default_timer as timer
start_time=timer()
model_0_results=train_test_loop(epoch=NUM_EPOCHS,model=model_0,
                train_dataloader=train_dataloader,
                test_dataloader=test_dataloader,
                loss_fn=loss_fn,optimizer=optimizer,
                device=device)
stop_timer=timer()
print(f"time taken: {stop_timer-start_time}")

In [None]:
torch.save(model_0.state_dict(), 'model.pth')

In [None]:
from google.colab import files
files.download("model.pth")
