<a href="https://colab.research.google.com/github/MuhammadIrzam447/visionCodes/blob/master/mm-imdb(1).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%cd /content/Dataset(s)/mm-imdb/fused

/content/Dataset(s)/mm-imdb/fused


In [2]:
!gdown https://drive.google.com/uc?id=1dxd2pySfCIDJYG7qMtuJre8ph068xc1X

Downloading...
From: https://drive.google.com/uc?id=1dxd2pySfCIDJYG7qMtuJre8ph068xc1X
To: /content/Dataset(s)/mm-imdb/fused/test.zip
100% 2.12G/2.12G [00:26<00:00, 79.8MB/s]


In [3]:
!gdown https://drive.google.com/uc?id=1sYR9EgHkM0oiGRQVlFQCyHO8kMRJ4ibQ

Downloading...
From: https://drive.google.com/uc?id=1sYR9EgHkM0oiGRQVlFQCyHO8kMRJ4ibQ
To: /content/Dataset(s)/mm-imdb/fused/test_label.txt
  0% 0.00/777k [00:00<?, ?B/s]100% 777k/777k [00:00<00:00, 8.16MB/s]


In [4]:
!gdown https://drive.google.com/uc?id=1X4cmMYRjxXFomCJ1adMhPNMYtd4WeDHP

Downloading...
From: https://drive.google.com/uc?id=1X4cmMYRjxXFomCJ1adMhPNMYtd4WeDHP
To: /content/Dataset(s)/mm-imdb/fused/train_label.txt
  0% 0.00/1.55M [00:00<?, ?B/s]100% 1.55M/1.55M [00:00<00:00, 14.6MB/s]100% 1.55M/1.55M [00:00<00:00, 14.6MB/s]


In [5]:
!gdown https://drive.google.com/uc?id=1yAeNEPXD3LNxmtSnC09jEg0vEDc1qiaq

Downloading...
From: https://drive.google.com/uc?id=1yAeNEPXD3LNxmtSnC09jEg0vEDc1qiaq
To: /content/Dataset(s)/mm-imdb/fused/train.zip
100% 4.21G/4.21G [00:50<00:00, 83.8MB/s]


In [None]:
!unzip test.zip

In [None]:
!unzip train.zip

# New Section

In [None]:
import torch
import os
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

class CustomDataset(Dataset):
    def __init__(self, data_file, root_dir, transform=None):
        self.data_file = data_file
        self.root_dir = root_dir
        self.transform = transform

        with open(data_file, 'r') as f:
            self.data = f.readlines()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        line = self.data[idx].strip().split('|')
        image_path = line[0].strip()
        image = Image.open(os.path.join(self.root_dir, image_path))

        labels = line[1].strip().split(',')
        labels = [label.strip() for label in labels]

        if self.transform:
            image = self.transform(image)

        return image, labels

# Define data transformations (resize, normalize, etc.)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
train_dataset = CustomDataset(data_file='/content/test_label.txt', root_dir='/content/test', transform=transform)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [None]:
with open('/content/test_label.txt', 'r') as f:
    lines = f.readlines()

unique_labels = set()
for line in lines:
    labels = line.strip().split('|')[1].strip().split(',')
    unique_labels.update(labels)

num_classes = len(unique_labels)
print(f"Number of classes: {num_classes}")


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

# Define your ResNet-101 model
model = models.resnet101(pretrained=True)  # You can use a pre-trained model

model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 512),
    nn.ReLU(),
    nn.Linear(512, num_classes),
    nn.Sigmoid()
)

In [None]:
criterion = nn.BCELoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

num_epochs = 10

In [None]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    epoch_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch + 1}/{num_epochs}] Loss: {epoch_loss:.4f}')

print('Training finished.')

# Now, you can use the trained model for evaluation and prediction
# Don't forget to create a DataLoader for your test/validation dataset and evaluate the model's performance.


# Training Dataset Preparation

In [14]:
import os
image_file_paths = []
genre_labels = []

image_folder_add = "/content/Dataset(s)/mm-imdb/fused/train"
labels_file = '/content/Dataset(s)/mm-imdb/fused/train_label.txt'

with open(labels_file, 'r') as file:
    for line in file:
        parts = line.strip().split('|')
        filename = parts[0].strip()
        labels = parts[1].strip().split(', ')  # Split labels by comma and remove leading/trailing spaces
        image_path = os.path.join(image_folder_add, filename)
        image_file_paths.append(image_path)
        genre_labels.append(labels)

In [15]:
image_file_paths

['/content/Dataset(s)/mm-imdb/fused/train/0106714.png',
 '/content/Dataset(s)/mm-imdb/fused/train/0106714_1.png',
 '/content/Dataset(s)/mm-imdb/fused/train/0106714_2.png',
 '/content/Dataset(s)/mm-imdb/fused/train/0204504.png',
 '/content/Dataset(s)/mm-imdb/fused/train/0204504_1.png',
 '/content/Dataset(s)/mm-imdb/fused/train/0204504_2.png',
 '/content/Dataset(s)/mm-imdb/fused/train/1865357.png',
 '/content/Dataset(s)/mm-imdb/fused/train/1865357_1.png',
 '/content/Dataset(s)/mm-imdb/fused/train/1865357_2.png',
 '/content/Dataset(s)/mm-imdb/fused/train/0033804.png',
 '/content/Dataset(s)/mm-imdb/fused/train/0033804_1.png',
 '/content/Dataset(s)/mm-imdb/fused/train/0033804_2.png',
 '/content/Dataset(s)/mm-imdb/fused/train/0066423.png',
 '/content/Dataset(s)/mm-imdb/fused/train/0066423_1.png',
 '/content/Dataset(s)/mm-imdb/fused/train/0066423_2.png',
 '/content/Dataset(s)/mm-imdb/fused/train/0045130.png',
 '/content/Dataset(s)/mm-imdb/fused/train/0045130_1.png',
 '/content/Dataset(s)/mm-i

In [16]:
genre_labels

[['Crime', 'Drama', 'Thriller'],
 ['Crime', 'Drama', 'Thriller'],
 ['Crime', 'Drama', 'Thriller'],
 ['Action', 'Crime', 'Drama', 'Thriller'],
 ['Action', 'Crime', 'Drama', 'Thriller'],
 ['Action', 'Crime', 'Drama', 'Thriller'],
 ['Drama'],
 ['Drama'],
 ['Drama'],
 ['Comedy', 'Romance'],
 ['Comedy', 'Romance'],
 ['Comedy', 'Romance'],
 ['Comedy'],
 ['Comedy'],
 ['Comedy'],
 ['Documentary', 'Short'],
 ['Documentary', 'Short'],
 ['Documentary', 'Short'],
 ['Drama'],
 ['Drama'],
 ['Drama'],
 ['Drama', 'Romance', 'Comedy'],
 ['Drama', 'Romance', 'Comedy'],
 ['Drama', 'Romance', 'Comedy'],
 ['Comedy', 'Drama'],
 ['Comedy', 'Drama'],
 ['Comedy', 'Drama'],
 ['Documentary', 'Action', 'Comedy'],
 ['Documentary', 'Action', 'Comedy'],
 ['Documentary', 'Action', 'Comedy'],
 ['Crime', 'Drama', 'Mystery'],
 ['Crime', 'Drama', 'Mystery'],
 ['Crime', 'Drama', 'Mystery'],
 ['Documentary', 'History'],
 ['Documentary', 'History'],
 ['Documentary', 'History'],
 ['Drama'],
 ['Drama'],
 ['Drama'],
 ['Romance

In [17]:
unique_labels_set = set()

for labels in genre_labels:
    unique_labels_set.update(labels)

unique_labels = sorted(list(unique_labels_set))

In [18]:
unique_labels

['Action',
 'Adventure',
 'Animation',
 'Biography',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Family',
 'Fantasy',
 'Film-Noir',
 'History',
 'Horror',
 'Music',
 'Musical',
 'Mystery',
 'News',
 'Reality-TV',
 'Romance',
 'Sci-Fi',
 'Short',
 'Sport',
 'Talk-Show',
 'Thriller',
 'War',
 'Western']

In [19]:
len(unique_labels)

26

In [20]:
from collections import defaultdict

label_counts = defaultdict(int)

for labels in genre_labels:
    for label in labels:
        label_counts[label] += 1

In [21]:
label_counts

defaultdict(int,
            {'Crime': 6879,
             'Drama': 25272,
             'Thriller': 9339,
             'Action': 6465,
             'Comedy': 15324,
             'Romance': 9678,
             'Documentary': 3702,
             'Short': 843,
             'Mystery': 3693,
             'History': 2040,
             'Family': 2934,
             'Adventure': 4833,
             'Fantasy': 3486,
             'Sci-Fi': 3636,
             'Western': 1269,
             'Horror': 4809,
             'Sport': 1137,
             'War': 2418,
             'Music': 1902,
             'Musical': 1509,
             'Animation': 1758,
             'Biography': 2364,
             'Film-Noir': 606,
             'News': 117,
             'Talk-Show': 6,
             'Reality-TV': 3})

In [22]:
multi_hot_labels = []

for labels in genre_labels:
    multi_hot = [1 if label in labels else 0 for label in unique_labels]
    multi_hot_labels.append(multi_hot)

In [23]:
multi_hot_labels[1]

[0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]

In [24]:
genre_labels[1]

['Crime', 'Drama', 'Thriller']

In [25]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

class CustomMultiLabelDataset(Dataset):
    def __init__(self, image_file_paths, multi_encoded_labels, transform=None):
        self.image_file_paths = image_file_paths
        self.multi_encoded_labels = multi_encoded_labels
        self.transform = transform

    def __len__(self):
        return len(self.image_file_paths)

    def __getitem__(self, idx):
        image_path = self.image_file_paths[idx]
        image = Image.open(image_path)

        labels = self.multi_encoded_labels[idx]
        labels = torch.tensor(labels, dtype=torch.float32)

        if self.transform:
            image = self.transform(image)

        return image, labels

# Define data transformations (resize, normalize, etc.)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [26]:
# Create a custom dataset
train_dataset = CustomMultiLabelDataset(image_file_paths, multi_hot_labels, transform=transform)

In [27]:
len(train_dataset)

46656

In [28]:
train_dataset[0]

(tensor([[[-0.2684, -1.2445, -1.1932,  ..., -1.2617, -1.2617, -0.2513],
          [-0.4739, -2.0665, -2.0494,  ..., -2.1179, -2.1179, -0.3712],
          [-0.2684, -2.0494, -2.1179,  ..., -2.1179, -2.1179, -0.3712],
          ...,
          [-0.3883, -2.0837, -2.0665,  ..., -2.1008, -2.1008, -0.3712],
          [-0.4226, -2.0665, -2.1008,  ..., -2.1008, -2.1008, -0.3883],
          [-0.2342, -1.2445, -1.1760,  ..., -1.2617, -1.2617, -0.2684]],
 
         [[-0.1450, -1.1429, -1.0903,  ..., -1.1604, -1.1604, -0.1275],
          [-0.3550, -1.9832, -1.9657,  ..., -2.0357, -2.0357, -0.2500],
          [-0.1450, -1.9657, -2.0357,  ..., -2.0357, -2.0357, -0.2500],
          ...,
          [-0.2675, -2.0007, -1.9832,  ..., -2.0182, -2.0182, -0.2500],
          [-0.3025, -1.9832, -2.0182,  ..., -2.0182, -2.0182, -0.2675],
          [-0.1099, -1.1429, -1.0728,  ..., -1.1604, -1.1604, -0.1450]],
 
         [[ 0.0779, -0.9156, -0.8633,  ..., -0.9330, -0.9330,  0.0953],
          [-0.1312, -1.7522,

# Test Dataset Preparation

In [29]:
import os
test_image_file_paths = []
test_genre_labels = []

image_folder_add = "/content/Dataset(s)/mm-imdb/fused/test"
labels_file = '/content/Dataset(s)/mm-imdb/fused/test_label.txt'

with open(labels_file, 'r') as file:
    for line in file:
        parts = line.strip().split('|')
        filename = parts[0].strip()
        labels = parts[1].strip().split(', ')  # Split labels by comma and remove leading/trailing spaces
        image_path = os.path.join(image_folder_add, filename)
        test_image_file_paths.append(image_path)
        test_genre_labels.append(labels)

In [30]:
# unique_labels_set = set()

# for labels in test_genre_labels:
#     unique_labels_set.update(labels)

# unique_labels = sorted(list(unique_labels_set))

In [31]:
# unique_labels

In [32]:
len(unique_labels)

26

In [33]:
test_multi_hot_labels = []

for labels in test_genre_labels:
    test_multi_hot = [1 if label in labels else 0 for label in unique_labels]
    test_multi_hot_labels.append(test_multi_hot)

In [34]:
test_dataset = CustomMultiLabelDataset(test_image_file_paths, test_multi_hot_labels, transform=transform)

In [35]:
len(test_dataset)

23397

# Training

In [36]:
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# model = models.resnet101(pretrained=True)  # You can use a pre-trained model

# model.fc = nn.Sequential(
#     nn.Linear(model.fc.in_features, 512),
#     nn.ReLU(),
#     nn.Linear(512, len(unique_labels)),
#     nn.Sigmoid()
# )

model = models.resnet101(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(unique_labels))
model.to(device)

In [38]:
# Define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()  # Binary Cross-Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 30

In [None]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels.float())
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}")

    if epoch > 15:
      save_dir = "/content/Model/Models-Train-21/"
      os.makedirs(save_dir, exist_ok=True)  # Create the directory if it doesn't exist
      model_name = str(epoch) + "_model.pth"
      save_path = os.path.join(save_dir, model_name)  # Specify the complete path to the model file
      torch.save(model.state_dict(), save_path)

    model.eval()
    predictions = []
    true_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            # loss = criterion(outputs, labels.float())

            # Apply a threshold (e.g., 0.5) to convert logits to binary predictions
            predictions.extend((outputs > 0.5).int().cpu().numpy())
            true_labels.extend(labels.int().cpu().numpy())

    # Calculate evaluation metrics
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions, average='macro')
    recall = recall_score(true_labels, predictions, average='macro')
    f1 = f1_score(true_labels, predictions, average='macro')

    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1}")
    print(classification_report(true_labels, predictions))

# New Section

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

save_dir = "/content/Model/Models-Train-1"
load_path = os.path.join(save_dir, '8_model.pth')


model = models.resnet101(pretrained=False)
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 512),
    nn.ReLU(),
    nn.Linear(512, len(unique_labels)),
    nn.Sigmoid()
)
model.load_state_dict(torch.load(load_path))
model.eval()
model.to(device)

In [None]:
# Initialize lists to store predicted and ground truth labels
predicted_labels = []
true_labels = []

# Iterate through the test set and make predictions
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted_labels.extend(outputs.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

In [None]:
predicted_labels[0]

In [None]:
threshold = 0.5

binary_predictions = []

for sample_logits in predicted_labels:
    binary_sample_predictions = [1 if value >= threshold else 0 for value in sample_logits]
    binary_predictions.append(binary_sample_predictions)

In [None]:
binary_predictions[1000]

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

accuracy = accuracy_score(true_labels, binary_predictions)
precision = precision_score(true_labels, binary_predictions, average='macro')
recall = recall_score(true_labels, binary_predictions, average='macro')
f1 = f1_score(true_labels, binary_predictions, average='macro')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")
print(classification_report(true_labels, binary_predictions))