In [135]:
import torch 
import torch.nn as nn
from torch.utils.data import DataLoader , random_split
from torchvision import datasets , transforms

train_dir = "/kaggle/input/images-data/Data/Train"
transform = transforms.Compose([
    transforms.Resize((336)),
    transforms.CenterCrop(248),
    transforms.RandomGrayscale(0.25),
    transforms.RandomHorizontalFlip(0.4),
    transforms.RandomApply([transforms.GaussianBlur(3)] , p=0.4) ,
    transforms.ToTensor()
])

full_data = datasets.ImageFolder(train_dir, transform=transform)
train_size = int(len(full_data) * 0.875)
test_size = len(full_data) - train_size
train_data , test_data = random_split(full_data ,[train_size,test_size])

train_loader = DataLoader(train_data , batch_size=32 ,pin_memory=True,num_workers=4 , shuffle=True)
test_loader = DataLoader(test_data , batch_size=4 ,pin_memory=True ,num_workers=4, shuffle=False)
print(f"Length of train loader : {len(train_loader)}")
print(f"Length of test loader : {len(test_loader)}")
print(full_data.class_to_idx)

Length of train loader : 22
Length of test loader : 26
{'AI': 0, 'Real': 1}


In [136]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN , self).__init__()
        self.conv1 = nn.Conv2d(3 , 32 , 3 , 1 , 0)
        self.conv2 = nn.Conv2d(32 , 64 , 3 , 1 , 0)
        self.pool = nn.MaxPool2d(2 , 2)
        self.fc1 = nn.Linear( 64*60*60, 1024 ) 
        self.fc2 = nn.Linear( 1024 , 256)
        self.fc3 = nn.Linear( 256 , 2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.3)

    def forward(self , x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0) , -1)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [137]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device : {device}")

Device : cuda


In [138]:
model = CNN().to(device)
learning_rate = 0.0003
loss_fn = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(model.parameters() , lr=learning_rate)
num_epochs = 12
for epoch in range(num_epochs):
    correct_predictions = 0
    total_samples = 0
    loss_per_epoch = 0
    model.train()
    for i , (features , labels) in enumerate(train_loader):
        features , labels = features.to(device) , labels.to(device)
        outputs = model(features)
        loss = loss_fn( outputs , labels)
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()
        loss_per_epoch +=loss.item()
        _, predictions = torch.max(outputs, 1)  
        correct_predictions += (predictions == labels).sum().item() 
        total_samples += labels.size(0) 

         
    print(f"For the epoch --> {epoch+1}/{num_epochs}")          
    loss_avg = loss_per_epoch/len(train_loader)  
    print(f"The avg training loss is : {loss_avg:.4f}") 
    accuracy = (correct_predictions / total_samples) * 100
    print(f"Train accuracy: {accuracy:.4f}")

    correct_predictions = 0
    total_samples = 0
    loss_per_epoch = 0
    with torch.no_grad():
       model.eval()
       for (features , labels) in test_loader:
            features , labels = features.to(device) , labels.to(device)
            outputs = model(features)
            loss = loss_fn( outputs , labels)
            loss_per_epoch +=loss.item()
            _, predictions = torch.max(outputs, 1)  
            correct_predictions += (predictions == labels).sum().item() 
            total_samples += labels.size(0) 
    loss_avg = loss_per_epoch/len(test_loader)  
    print(f"The avg testing loss is : {loss_avg:.4f}") 
    accuracy = (correct_predictions / total_samples) * 100
    print(f"Test accuracy: {accuracy:.4f}\n")        
     


For the epoch --> 1/12
The avg training loss is : 0.5488
Train accuracy: 71.7143
The avg testing loss is : 0.1635
Test accuracy: 94.0594

For the epoch --> 2/12
The avg training loss is : 0.1950
Train accuracy: 91.7143
The avg testing loss is : 0.0862
Test accuracy: 97.0297

For the epoch --> 3/12
The avg training loss is : 0.0512
Train accuracy: 98.2857
The avg testing loss is : 0.0456
Test accuracy: 99.0099

For the epoch --> 4/12
The avg training loss is : 0.0184
Train accuracy: 99.8571
The avg testing loss is : 0.0139
Test accuracy: 100.0000

For the epoch --> 5/12
The avg training loss is : 0.0062
Train accuracy: 99.8571
The avg testing loss is : 0.0218
Test accuracy: 99.0099

For the epoch --> 6/12
The avg training loss is : 0.0032
Train accuracy: 100.0000
The avg testing loss is : 0.0073
Test accuracy: 100.0000

For the epoch --> 7/12
The avg training loss is : 0.0103
Train accuracy: 99.8571
The avg testing loss is : 0.0015
Test accuracy: 100.0000

For the epoch --> 8/12
The avg

In [139]:
#### trying the model on full data ####
transformInferencing = transforms.Compose([
    transforms.Resize((336)),
    transforms.CenterCrop(248),
    transforms.ToTensor()
])
data = datasets.ImageFolder(train_dir, transform=transformInferencing)
full_dataset = DataLoader(data , batch_size=16 ,num_workers=2 , shuffle=True)
correct_predictions = 0
total_samples = 0
loss_per_epoch = 0
with torch.no_grad():
       model.eval()
       for (features , labels) in full_dataset:
            features , labels = features.to(device) , labels.to(device)
            outputs = model(features)
            loss = loss_fn( outputs , labels)
            loss_per_epoch +=loss.item()
            _, predictions = torch.max(outputs, 1)  
            correct_predictions += (predictions == labels).sum().item() 
            total_samples += labels.size(0) 
loss_avg = loss_per_epoch/len(full_dataset)  
print(f"The avg loss on full data : {loss_avg:.4f}") 
accuracy = (correct_predictions / total_samples) * 100
print(f"Accuracy on full data : {accuracy:.4f}") 

The avg loss on full data : 0.0019
Accuracy on full data : 100.0000


In [140]:
from torch.utils.data import Dataset
from PIL import Image
import os

class CustomDataset(Dataset):
    def __init__(self , transform , test_file_dir = "/kaggle/input/images-data/Data/Test" ):
        self.test_file_dir = test_file_dir
        self.transform = transform
    def __len__(self):
        return len(os.listdir(self.test_file_dir))
    def __getitem__(self , index):
        img_name = os.listdir(self.test_file_dir)[index]
        img_id = img_name[:-4]
        img_path = os.path.join(self.test_file_dir,img_name)
        img = Image.open(img_path).convert("RGB")
        img = self.transform(img)
        return img , img_id
        
testing_data = CustomDataset(transformInferencing)
testing_loader = DataLoader(testing_data , batch_size=1 , shuffle=False)

In [141]:
img_ids = []
labels = []
with torch.no_grad():
    model.eval()
    for (img , img_id) in testing_loader:
        img = img.to(device)
        output = model(img)
        _, prediction = torch.max(output, 1) 
        labels.append(prediction.item())
        img_ids.append(img_id[0])

In [143]:
import pandas as pd

id_num = [int(img_id[6:]) for img_id in img_ids]
df = pd.DataFrame({ 'Id_num' : id_num ,'Id': img_ids , 'Label' : labels})
df.sort_values(by = 'Id_num' , ascending=True , inplace=True)
print(df.shape)
df.head()

(200, 3)


Unnamed: 0,Id_num,Id,Label
117,1,image_1,0
26,2,image_2,1
161,3,image_3,1
13,4,image_4,1
35,5,image_5,0


In [144]:
df['Label'] = df.Label.map(lambda x: 'AI' if x==0 else 'Real') 
df.drop(columns = 'Id_num' , inplace=True)
df.reset_index(drop=True , inplace=True)
df.head()

Unnamed: 0,Id,Label
0,image_1,AI
1,image_2,Real
2,image_3,Real
3,image_4,Real
4,image_5,AI


In [145]:
df.Label.value_counts()

Label
AI      100
Real    100
Name: count, dtype: int64

In [146]:
df.to_csv('/kaggle/working/my_df_100-100.csv', index=False)