In [None]:
! pip install -q kaggle

In [None]:
!mkdir ~/.kaggle

!cp kaggle.json ~/.kaggle/

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle datasets list

ref                                                             title                                             size  lastUpdated          downloadCount  voteCount  usabilityRating  
--------------------------------------------------------------  -----------------------------------------------  -----  -------------------  -------------  ---------  ---------------  
thedrcat/daigt-v2-train-dataset                                 DAIGT V2 Train Dataset                            29MB  2023-11-16 01:38:36           2351        216  1.0              
manjitbaishya001/la-crime-data-2010-to-2023                     Los Angeles Crime Data - 2010 to 2023            147MB  2023-12-19 13:20:31            778         23  1.0              
manjitbaishya001/house-prices-2023                              Pakistan House Prices - 2023                       1MB  2023-12-15 18:57:24            810         21  1.0              
muhammadbinimran/housing-price-prediction-data                  Housing Pri

In [None]:
!kaggle datasets download -d ashraygupta9/hateful-meme-challenge

Downloading hateful-meme-challenge.zip to /content
100% 3.94G/3.94G [00:44<00:00, 122MB/s] 
100% 3.94G/3.94G [00:44<00:00, 94.0MB/s]


In [None]:
!unzip hateful-meme-challenge.zip


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: hateful_memes/img/58497.png  
  inflating: hateful_memes/img/58601.png  
  inflating: hateful_memes/img/58602.png  
  inflating: hateful_memes/img/58604.png  
  inflating: hateful_memes/img/58607.png  
  inflating: hateful_memes/img/58609.png  
  inflating: hateful_memes/img/58612.png  
  inflating: hateful_memes/img/58613.png  
  inflating: hateful_memes/img/58621.png  
  inflating: hateful_memes/img/58631.png  
  inflating: hateful_memes/img/58637.png  
  inflating: hateful_memes/img/58640.png  
  inflating: hateful_memes/img/58642.png  
  inflating: hateful_memes/img/58649.png  
  inflating: hateful_memes/img/58672.png  
  inflating: hateful_memes/img/58674.png  
  inflating: hateful_memes/img/58679.png  
  inflating: hateful_memes/img/58690.png  
  inflating: hateful_memes/img/58694.png  
  inflating: hateful_memes/img/58703.png  
  inflating: hateful_memes/img/58706.png  
  inflating: hateful_memes/img/5

In [None]:
!kaggle datasets download -d utkarshsaxenadn/landscape-recognition-image-dataset-12k-images

Downloading landscape-recognition-image-dataset-12k-images.zip to /content
 97% 481M/496M [00:03<00:00, 166MB/s]
100% 496M/496M [00:03<00:00, 144MB/s]


In [None]:
!unzip landscape-recognition-image-dataset-12k-images

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: Landscape Classification/Landscape Classification/Training Data/Glacier/Glacier-Train (1449).jpeg  
  inflating: Landscape Classification/Landscape Classification/Training Data/Glacier/Glacier-Train (145).jpeg  
  inflating: Landscape Classification/Landscape Classification/Training Data/Glacier/Glacier-Train (1450).jpeg  
  inflating: Landscape Classification/Landscape Classification/Training Data/Glacier/Glacier-Train (1451).jpeg  
  inflating: Landscape Classification/Landscape Classification/Training Data/Glacier/Glacier-Train (1452).jpeg  
  inflating: Landscape Classification/Landscape Classification/Training Data/Glacier/Glacier-Train (1453).jpeg  
  inflating: Landscape Classification/Landscape Classification/Training Data/Glacier/Glacier-Train (1454).jpeg  
  inflating: Landscape Classification/Landscape Classification/Training Data/Glacier/Glacier-Train (1455).jpeg  
  inflating: Landscape Classific

In [87]:
import os
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
from shutil import copyfile
from tqdm import tqdm

original_meme_dataset_path = "/content/hateful_memes/img"
original_non_meme_dataset_path = "/content/Landscape Classification/Landscape Classification/Training Data/Forest"

sampled_meme_path = "memes_final2"
sampled_non_meme_path = "non_memes_final2"

def sample_images(src, dest, sample_percentage):
    files = os.listdir(src)
    num_files_to_sample = int(len(files) * sample_percentage)
    sampled_files = random.sample(files, num_files_to_sample)

    os.makedirs(dest, exist_ok=True)
    for file in sampled_files:
        src_path = os.path.join(src, file)
        dest_path = os.path.join(dest, f"{file.split('.')[0]}_{random.randint(1, 1000000)}.{file.split('.')[1]}")
        copyfile(src_path, dest_path)

sample_images(original_meme_dataset_path, sampled_meme_path, sample_percentage=0.20)
sample_images(original_non_meme_dataset_path, sampled_non_meme_path, sample_percentage=0.20)

train_path = "train2"
val_path = "validation2"
test_path = "test2"

train_percentage = 0.7
val_percentage = 0.15
test_percentage = 0.15

def split_and_label_dataset(src, train_dest, val_dest, test_dest, label):
    files = os.listdir(src)
    num_files = len(files)
    num_train = int(num_files * train_percentage)
    num_val = int(num_files * val_percentage)

    random.shuffle(files)

    os.makedirs(os.path.join(train_dest, label), exist_ok=True)
    os.makedirs(os.path.join(val_dest, label), exist_ok=True)
    os.makedirs(os.path.join(test_dest, label), exist_ok=True)

    train_set = files[:num_train]
    val_set = files[num_train:num_train + num_val]
    test_set = files[num_train + num_val:]

    copy_files(src, train_set, os.path.join(train_dest, label), label)
    copy_files(src, val_set, os.path.join(val_dest, label), label)
    copy_files(src, test_set, os.path.join(test_dest, label), label)

def copy_files(src, file_list, dest, label):
    os.makedirs(dest, exist_ok=True)
    for file in file_list:
        src_path = os.path.join(src, file)
        dest_path = os.path.join(dest, f"{file.split('.')[0]}_{random.randint(1, 1000000)}.{file.split('.')[1]}")
        copyfile(src_path, dest_path)
        new_filename = f"{label}_{dest_path.split(os.sep)[-2]}_{dest_path.split(os.sep)[-1]}"
        os.rename(dest_path, os.path.join(dest, new_filename))

def copy_files(src, file_list, dest, label):
    os.makedirs(dest, exist_ok=True)
    for file in file_list:
        src_path = os.path.join(src, file)
        dest_path = os.path.join(dest, f"{file.split('.')[0]}_{random.randint(1, 1000000)}.{file.split('.')[1]}")
        copyfile(src_path, dest_path)
        new_filename = f"{label}_{dest_path.split('/')[-1]}"
        os.rename(dest_path, os.path.join(dest, new_filename))

split_and_label_dataset(sampled_meme_path, train_path, val_path, test_path, label="meme")
split_and_label_dataset(sampled_non_meme_path, train_path, val_path, test_path, label="non_meme")

img_width, img_height = 150, 150
batch_size = 32
epochs = 10

class CustomDataset(Dataset):
    def __init__(self, root_folder, transform=None):
        self.root_folder = root_folder
        self.transform = transform
        self.images = self._get_all_images(root_folder)
        print(f"Number of images in {root_folder}: {len(self.images)}")

    def _get_all_images(self, folder):
        images = []
        for root, _, files in os.walk(folder):
            for file in files:
                if os.path.isfile(os.path.join(root, file)):
                    images.append(os.path.relpath(os.path.join(root, file), self.root_folder))
        return images

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_folder, self.images[idx])

        while not os.path.isfile(img_name):
            idx = (idx + 1) % len(self.images)
            img_name = os.path.join(self.root_folder, self.images[idx])

        image = Image.open(img_name).convert('RGB')

        if self.transform:
            image = self.transform(image)

        class_name = img_name.split(os.sep)[-2]
        label = 0 if "non" in class_name else 1

        return image, label

transform = transforms.Compose([
    transforms.Resize((img_width, img_height)),
    transforms.ToTensor(),
])

train_dataset = CustomDataset(train_path, transform=transform)
val_dataset = CustomDataset(val_path, transform=transform)
test_dataset = CustomDataset(test_path, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * (img_width // 4) * (img_height // 4), 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * (img_width // 4) * (img_height // 4))
        x = F.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN().to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(epochs):
    model.train()
    total_batches = len(train_loader)

    with tqdm(total=total_batches, desc=f'Epoch {epoch + 1}/{epochs}', unit='batch') as pbar:
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels.float())
            loss.backward()
            optimizer.step()

            pbar.update(1)

model.eval()
correct = 0
total = 0
predicted_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)

        predicted = torch.round(torch.sigmoid(outputs))

        predicted_labels.extend(predicted.cpu().numpy())
        total += labels.size(0)

        labels_np = labels.cpu().numpy()

        correct += (predicted.cpu().numpy() == labels_np).sum()

predicted_labels = torch.Tensor(predicted_labels)

accuracy = correct / total
print(f'Test Accuracy: {accuracy * 100:.2f}%')

print("Predicted Labels:", predicted_labels[:10].numpy())
print("True Labels:", labels_np[:10])
print("Number of samples in test_dataset:", len(test_dataset))


Number of images in train2: 1979
Number of images in validation2: 424
Number of images in test2: 425


Epoch 1/10: 100%|██████████| 62/62 [00:23<00:00,  2.59batch/s]
Epoch 2/10: 100%|██████████| 62/62 [00:23<00:00,  2.62batch/s]
Epoch 3/10: 100%|██████████| 62/62 [00:23<00:00,  2.67batch/s]
Epoch 4/10: 100%|██████████| 62/62 [00:23<00:00,  2.59batch/s]
Epoch 5/10: 100%|██████████| 62/62 [00:23<00:00,  2.59batch/s]
Epoch 6/10: 100%|██████████| 62/62 [00:24<00:00,  2.51batch/s]
Epoch 7/10: 100%|██████████| 62/62 [00:23<00:00,  2.58batch/s]
Epoch 8/10: 100%|██████████| 62/62 [00:22<00:00,  2.74batch/s]
Epoch 9/10: 100%|██████████| 62/62 [00:23<00:00,  2.66batch/s]
Epoch 10/10: 100%|██████████| 62/62 [00:23<00:00,  2.64batch/s]


Test Accuracy: 2699.53%
Predicted Labels: [[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]
True Labels: [1 1 1 1 1 1 1 1 1]
Number of samples in test_dataset: 425


In [86]:
import shutil

folder_path = ['/content/validation2', '/content/train2', '/content/test2', '/content/memes_final2', '/content/non_memes_final2']

# Delete all contents under the folder
for i in folder_path:
  shutil.rmtree(i, ignore_errors=True)