In [2]:
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from sklearn.model_selection import StratifiedKFold
import numpy as np
from PIL import Image

# Load the Excel file and check for NaN values
file_path = 'multi-sent-p.xlsx'
df = pd.read_excel(file_path)
df = df.dropna(subset=['Label_Sentiment'])  # Remove rows with NaN in 'Label_Sentiment'
df['Label_Sentiment'] = df['Label_Sentiment'].astype(int)  # Ensure Label_Sentiment is integer type

class ImageOnlyDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.dataframe.iloc[idx, 0])
        
        try:
            image = Image.open(img_name).convert("RGB")
        except FileNotFoundError:
            return None
        
        if self.transform:
            image = self.transform(image)
        
        # Use existing label as it is already numeric (0 = negative, 1 = positive)
        label = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.long)
        
        sample = {
            'image': image,
            'label': label
        }
        return sample

# Transformations for the image
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Define the dataset
dataset = ImageOnlyDataset(dataframe=df, img_dir='Memes/', transform=transform)

# Custom collate function to filter out None samples
def collate_fn(batch):
    batch = [sample for sample in batch if sample is not None]
    if len(batch) == 0:
        return None
    return torch.utils.data.dataloader.default_collate(batch)

class VisionOnlyModel(nn.Module):
    def __init__(self):
        super(VisionOnlyModel, self).__init__()
        # Load a pre-trained Vision Transformer model
        self.vision_model = models.vit_b_16(weights='IMAGENET1K_V1')
        
        # Access the number of input features for the final layer in the classifier head
        in_features = self.vision_model.heads[0].in_features
        
        # Modify the classifier to fit 2 classes (binary classification)
        self.vision_model.heads = nn.Sequential(
            nn.Linear(in_features, 2)
        )

    def forward(self, images):
        return self.vision_model(images)


# Initialize model, loss, and optimizer
model = VisionOnlyModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Early stopping parameters
patience = 2
best_loss = float('inf')
early_stop_counter = 0

# K-Fold Cross-Validation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
fold_results = []

for fold, (train_idx, test_idx) in enumerate(skf.split(dataset, df['Label_Sentiment'])):
    print(f'Fold {fold + 1}')
    
    train_subsampler = Subset(dataset, train_idx)
    test_subsampler = Subset(dataset, test_idx)
    
    train_dataloader = DataLoader(train_subsampler, batch_size=16, shuffle=True, collate_fn=collate_fn)
    test_dataloader = DataLoader(test_subsampler, batch_size=16, shuffle=False, collate_fn=collate_fn)
    
    # Training loop with early stopping
    model.train()
    for epoch in range(10):  
        epoch_loss = 0.0
        for batch in train_dataloader:
            if batch is None:
                continue
            optimizer.zero_grad()
            outputs = model(images=batch['image'])
            loss = criterion(outputs, batch['label'])
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        
        # Validation step for early stopping
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch in test_dataloader:
                if batch is None:
                    continue
                outputs = model(images=batch['image'])
                loss = criterion(outputs, batch['label'])
                val_loss += loss.item()
        
        print(f'Epoch {epoch + 1} - Train Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}')
        
        # Check early stopping condition
        if val_loss < best_loss:
            best_loss = val_loss
            early_stop_counter = 0  
        else:
            early_stop_counter += 1
            if early_stop_counter >= patience:
                print(f'Early stopping triggered at epoch {epoch + 1}')
                break  
    
    # Evaluation
    model.eval()
    all_labels = []
    all_preds = []
    with torch.no_grad():
        for batch in test_dataloader:
            if batch is None:
                continue
            outputs = model(images=batch['image'])
            _, preds = torch.max(outputs, 1)
            all_labels.extend(batch['label'].numpy())
            all_preds.extend(preds.numpy())

    # Calculate metrics with zero_division parameter
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, zero_division=1)
    precision = precision_score(all_labels, all_preds, zero_division=1)
    recall = recall_score(all_labels, all_preds, zero_division=1)
    conf_matrix = confusion_matrix(all_labels, all_preds, labels=[0, 1])

    fold_results.append({
        'fold': fold + 1,
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'confusion_matrix': conf_matrix
    })
    print(f'Fold {fold + 1} - Accuracy: {accuracy}, F1: {f1}, Precision: {precision}, Recall: {recall}')
    print(f'Confusion Matrix:\n{conf_matrix}')

# Average results across folds
avg_accuracy = np.mean([result['accuracy'] for result in fold_results])
avg_f1 = np.mean([result['f1'] for result in fold_results])
avg_precision = np.mean([result['precision'] for result in fold_results])
avg_recall = np.mean([result['recall'] for result in fold_results])

print(f'Average Accuracy: {avg_accuracy}')
print(f'Average F1 Score: {avg_f1}')
print(f'Average Precision: {avg_precision}')
print(f'Average Recall: {avg_recall}')


Fold 1
Epoch 1 - Train Loss: 144.6750, Val Loss: 33.1873
Epoch 2 - Train Loss: 122.6863, Val Loss: 33.1945
Epoch 3 - Train Loss: 100.6350, Val Loss: 35.4359
Early stopping triggered at epoch 3
Fold 1 - Accuracy: 0.6965020576131687, F1: 0.6326276463262764, Precision: 0.8063492063492064, Recall: 0.5204918032786885
Confusion Matrix:
[[423  61]
 [234 254]]
Fold 2
Epoch 1 - Train Loss: 93.3714, Val Loss: 18.9007
Epoch 2 - Train Loss: 66.5774, Val Loss: 19.8988
Epoch 3 - Train Loss: 49.8104, Val Loss: 26.0918
Early stopping triggered at epoch 3
Fold 2 - Accuracy: 0.8372811534500515, F1: 0.8326271186440678, Precision: 0.8618421052631579, Recall: 0.805327868852459
Confusion Matrix:
[[420  63]
 [ 95 393]]
Fold 3
Epoch 1 - Train Loss: 55.0062, Val Loss: 6.4344
Epoch 2 - Train Loss: 31.0495, Val Loss: 9.9631
Epoch 3 - Train Loss: 31.6805, Val Loss: 23.5104
Early stopping triggered at epoch 3
Fold 3 - Accuracy: 0.8403707518022657, F1: 0.8597285067873304, Precision: 0.7698541329011345, Recall: 0.97

In [3]:
import pandas as pd
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix

# Load the test dataset
test_file_path = 'test_data.xlsx'
test_df = pd.read_excel(test_file_path)
test_df = test_df.dropna(subset=['Label_Sentiment'])  # Remove rows with NaN in 'Label_Sentiment'
test_df['Label_Sentiment'] = test_df['Label_Sentiment'].astype(int)  # Ensure Label_Sentiment is integer type

class TestImageDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.dataframe.iloc[idx, 0])
        
        try:
            image = Image.open(img_name).convert("RGB")
        except FileNotFoundError:
            return None
        
        if self.transform:
            image = self.transform(image)
        
        label = torch.tensor(self.dataframe.iloc[idx, 2], dtype=torch.long)
        
        sample = {
            'image': image,
            'label': label
        }
        return sample

# Define transformations for the images
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create the test dataset and data loader
test_dataset = TestImageDataset(dataframe=test_df, img_dir='Memes-test/', transform=test_transform)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn)

# Evaluate the model on the test data
model.eval()
all_labels = []
all_preds = []

with torch.no_grad():
    for batch in test_dataloader:
        if batch is None:
            continue
        outputs = model(images=batch['image'])
        _, preds = torch.max(outputs, 1)
        all_labels.extend(batch['label'].numpy())
        all_preds.extend(preds.numpy())

# Calculate metrics
accuracy = accuracy_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds, zero_division=1)
precision = precision_score(all_labels, all_preds, zero_division=1)
recall = recall_score(all_labels, all_preds, zero_division=1)
conf_matrix = confusion_matrix(all_labels, all_preds, labels=[0, 1])

# Print results
print(f'Test Accuracy: {accuracy}')
print(f'Test F1 Score: {f1}')
print(f'Test Precision: {precision}')
print(f'Test Recall: {recall}')
print(f'Confusion Matrix:\n{conf_matrix}')


Test Accuracy: 0.725925925925926
Test F1 Score: 0.7218045112781954
Test Precision: 0.7032967032967034
Test Recall: 0.7413127413127413
Confusion Matrix:
[[200  81]
 [ 67 192]]
