<a href="https://colab.research.google.com/github/MuhammadIrzam447/NewEncodings/blob/main/Train-06.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Training Dataset Preparation

In [1]:
import os
image_file_paths = []
genre_labels = []

image_folder_add = "/content/MMLearning/data/imdb/mulitmodal/train"
labels_file = '/content/MMLearning/data/imdb/mulitmodal/train_label.txt'

with open(labels_file, 'r') as file:
    for line in file:
        parts = line.strip().split('|')
        filename = parts[0].strip()
        labels = parts[1].strip().split(', ')  # Split labels by comma and remove leading/trailing spaces
        image_path = os.path.join(image_folder_add, filename)
        image_file_paths.append(image_path)
        genre_labels.append(labels)

In [2]:
len(image_file_paths)

31104

In [None]:
# genre_labels

In [None]:
# unique_labels_set = set()

# for labels in genre_labels:
#     unique_labels_set.update(labels)

# unique_labels = sorted(list(unique_labels_set))

In [None]:
# unique_labels

In [None]:
# len(unique_labels)

In [3]:
from collections import defaultdict

label_counts = defaultdict(int)

for labels in genre_labels:
    for label in labels:
        label_counts[label] += 1

In [4]:
# Convert the defaultdict into a list of tuples
label_count_list = [(label, count) for label, count in label_counts.items()]

# Sort the list of tuples based on counts in descending order
sorted_label_count_list = sorted(label_count_list, key=lambda x: x[1], reverse=True)

# Display the sorted list
for label, count in sorted_label_count_list:
    print(f"{label}: {count}")

print("Total Labels: ", len(label_count_list))

Drama: 16848
Comedy: 10216
Romance: 6452
Thriller: 6226
Crime: 4586
Action: 4310
Adventure: 3222
Horror: 3206
Documentary: 2468
Mystery: 2462
Sci-Fi: 2424
Fantasy: 2324
Family: 1956
War: 1612
Biography: 1576
History: 1360
Music: 1268
Animation: 1172
Musical: 1006
Western: 846
Sport: 758
Short: 562
Film-Noir: 404
News: 78
Talk-Show: 4
Reality-TV: 2
Total Labels:  26


In [5]:
min_label_count = 400
valid_labels = [label for label, count in label_counts.items() if count >= min_label_count]
valid_labels = sorted(list(valid_labels))

In [6]:
valid_labels, len(valid_labels)

(['Action',
  'Adventure',
  'Animation',
  'Biography',
  'Comedy',
  'Crime',
  'Documentary',
  'Drama',
  'Family',
  'Fantasy',
  'Film-Noir',
  'History',
  'Horror',
  'Music',
  'Musical',
  'Mystery',
  'Romance',
  'Sci-Fi',
  'Short',
  'Sport',
  'Thriller',
  'War',
  'Western'],
 23)

In [None]:
# filtered_image_paths = []
# filtered_genre_labels = []

# for image_path, labels in zip(image_file_paths, genre_labels):
#     valid_labels_for_sample = [label for label in labels if label in valid_labels]

#     if valid_labels_for_sample:
#         filtered_image_paths.append(image_path)
#         filtered_genre_labels.append(valid_labels_for_sample)

In [None]:
# len(filtered_image_paths)

In [None]:
# len(filtered_genre_labels)

In [7]:
multi_hot_labels = []

for labels in genre_labels:
    multi_hot = [1 if label in labels else 0 for label in valid_labels]
    multi_hot_labels.append(multi_hot)

In [8]:
multi_hot_labels[1]

[0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]

In [9]:
genre_labels[1]

['Crime', 'Drama', 'Thriller']

In [10]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

class CustomMultiLabelDataset(Dataset):
    def __init__(self, image_file_paths, multi_encoded_labels, transform=None):
        self.image_file_paths = image_file_paths
        self.multi_encoded_labels = multi_encoded_labels
        self.transform = transform

    def __len__(self):
        return len(self.image_file_paths)

    def __getitem__(self, idx):
        image_path = self.image_file_paths[idx]
        image = Image.open(image_path)

        labels = self.multi_encoded_labels[idx]
        labels = torch.tensor(labels, dtype=torch.float32)

        if self.transform:
            image = self.transform(image)

        return image, labels

# Define data transformations (resize, normalize, etc.)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [11]:
# Create a custom dataset
train_dataset = CustomMultiLabelDataset(image_file_paths, multi_hot_labels, transform=transform)

In [12]:
len(train_dataset)

31104

In [13]:
train_dataset[10]

(tensor([[[2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
          [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
          [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
          ...,
          [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
          [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489],
          [2.2489, 2.2489, 2.2489,  ..., 2.2489, 2.2489, 2.2489]],
 
         [[2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
          [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
          [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
          ...,
          [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
          [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286],
          [2.4286, 2.4286, 2.4286,  ..., 2.4286, 2.4286, 2.4286]],
 
         [[2.6400, 2.6400, 2.6400,  ..., 2.6400, 2.6400, 2.6400],
          [2.6400, 2.6400, 2.6400,  ..., 2.6400, 2.6400, 2.6400],
          [2.6400, 2.6400, 2.6400,  ...,

# Test Dataset Preparation

In [14]:
import os

test_image_file_paths = []
test_genre_labels = []

image_folder_add = "/content/MMLearning/data/imdb/mulitmodal/test"
labels_file = '/content/MMLearning/data/imdb/mulitmodal/test_label.txt'

with open(labels_file, 'r') as file:
    for line in file:
        parts = line.strip().split('|')
        filename = parts[0].strip()
        labels = parts[1].strip().split(', ')  # Split labels by comma and remove leading/trailing spaces

        if not (filename.endswith("_1.png") or filename.endswith("_2.png")):
            image_path = os.path.join(image_folder_add, filename)
            test_image_file_paths.append(image_path)
            test_genre_labels.append(labels)


In [15]:
test_image_file_paths[0:10]

['/content/MMLearning/data/imdb/mulitmodal/test/0078718_3.png',
 '/content/MMLearning/data/imdb/mulitmodal/test/0078718_4.png',
 '/content/MMLearning/data/imdb/mulitmodal/test/0089003_3.png',
 '/content/MMLearning/data/imdb/mulitmodal/test/0089003_4.png',
 '/content/MMLearning/data/imdb/mulitmodal/test/0098136_3.png',
 '/content/MMLearning/data/imdb/mulitmodal/test/0098136_4.png',
 '/content/MMLearning/data/imdb/mulitmodal/test/0057693_3.png',
 '/content/MMLearning/data/imdb/mulitmodal/test/0057693_4.png',
 '/content/MMLearning/data/imdb/mulitmodal/test/0385330_3.png',
 '/content/MMLearning/data/imdb/mulitmodal/test/0385330_4.png']

In [16]:
test_multi_hot_labels = []

for labels in test_genre_labels:
    test_multi_hot = [1 if label in labels else 0 for label in valid_labels]
    test_multi_hot_labels.append(test_multi_hot)

In [17]:
test_dataset = CustomMultiLabelDataset(test_image_file_paths, test_multi_hot_labels, transform=transform)

In [18]:
len(test_dataset)

15598

# Training

In [19]:
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet101(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(valid_labels))
model.to(device)



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [21]:
# Define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()  # Binary Cross-Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 30

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels.float())
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}")

    # if epoch > 10:
    save_dir = "/content/MMLearning/data/Models/Model-06"
    os.makedirs(save_dir, exist_ok=True)  # Create the directory if it doesn't exist
    model_name = str(epoch+1) + "_model.pth"
    save_path = os.path.join(save_dir, model_name)  # Specify the complete path to the model file
    torch.save(model.state_dict(), save_path)

    model.eval()
    predictions = []
    true_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            # loss = criterion(outputs, labels.float())

            # Apply a threshold (e.g., 0.5) to convert logits to binary predictions
            predictions.extend((outputs > 0.5).int().cpu().numpy())
            true_labels.extend(labels.int().cpu().numpy())

    # Calculate evaluation metrics
    # accuracy = accuracy_score(true_labels, predictions)
    # precision = precision_score(true_labels, predictions, average='macro')
    # recall = recall_score(true_labels, predictions, average='macro')
    f1 = f1_score(true_labels, predictions, average='macro')

    # print(f"Accuracy: {accuracy}")
    # print(f"Precision: {precision}")
    # print(f"Recall: {recall}")
    print(f"F1-Score: {f1}")
    # print(classification_report(true_labels, predictions))

Epoch 1/30, Loss: 0.2880440473771144
F1-Score: 0.0017106070466810321
Epoch 2/30, Loss: 0.28217484898589273
F1-Score: 0.03482498987781217
Epoch 3/30, Loss: 0.26944813868145884
F1-Score: 0.050659761285194546
Epoch 4/30, Loss: 0.2634299943077957
F1-Score: 0.033965068047724124
Epoch 5/30, Loss: 0.2591226782971694
F1-Score: 0.09250627246464097
Epoch 6/30, Loss: 0.25414273070761695
F1-Score: 0.0966272852042647
Epoch 7/30, Loss: 0.2502098709130238
F1-Score: 0.14410359623556906
Epoch 8/30, Loss: 0.2467557183417028
F1-Score: 0.13912563821246285
Epoch 9/30, Loss: 0.24312099731630749
F1-Score: 0.14427143391130365
Epoch 10/30, Loss: 0.24011414484293372
F1-Score: 0.11510150226273462
Epoch 11/30, Loss: 0.23738420467992377
F1-Score: 0.16800606408509433
Epoch 12/30, Loss: 0.23424174075509296
F1-Score: 0.18008195940384947
Epoch 13/30, Loss: 0.23186652474258662
F1-Score: 0.1764825656597426
Epoch 14/30, Loss: 0.22844628572709275
F1-Score: 0.22191399390512725
Epoch 15/30, Loss: 0.2257974140591337
F1-Score

# New Section

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

save_dir = "/content/Model/Models-Train-1"
load_path = os.path.join(save_dir, '8_model.pth')


model = models.resnet101(pretrained=False)
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 512),
    nn.ReLU(),
    nn.Linear(512, len(unique_labels)),
    nn.Sigmoid()
)
model.load_state_dict(torch.load(load_path))
model.eval()
model.to(device)

In [None]:
# Initialize lists to store predicted and ground truth labels
predicted_labels = []
true_labels = []

# Iterate through the test set and make predictions
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted_labels.extend(outputs.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

In [None]:
predicted_labels[0]

In [None]:
threshold = 0.5

binary_predictions = []

for sample_logits in predicted_labels:
    binary_sample_predictions = [1 if value >= threshold else 0 for value in sample_logits]
    binary_predictions.append(binary_sample_predictions)

In [None]:
binary_predictions[1000]

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

accuracy = accuracy_score(true_labels, binary_predictions)
precision = precision_score(true_labels, binary_predictions, average='macro')
recall = recall_score(true_labels, binary_predictions, average='macro')
f1 = f1_score(true_labels, binary_predictions, average='macro')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")
print(classification_report(true_labels, binary_predictions))