# Final Prediciton notebook for test dataset

## Efficentnetv2s data loading and evaluation

In [None]:
!pip install timm
!pip install scikit-learn
!pip install scikit-image
!pip install opencv-python
!pip install torch

In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets, transforms, models
from torchvision.models import efficientnet_v2_s, EfficientNet_V2_S_Weights
from torchvision.transforms import transforms as T
from torchvision.io import read_image
from torchvision.utils import save_image
from PIL import Image
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, accuracy_score
import pandas as pd
import timm


In [None]:

class CustomDataset(Dataset):
    '''
    A custom PyTorch dataset for loading and preprocessing images, paired with their corresponding labels.
    The images are read from a list of file paths, optionally transformed, and returned alongside their labels.

    Args:
        file_paths (list of str): A list of file paths pointing to the images.
        labels (list of int or float): A list of labels corresponding to each image in `file_paths`.
        transform (callable, optional): A function/transform to apply to the images. Defaults to None.

    Attributes:
        file_paths (list): List of image file paths.
        labels (list): List of labels corresponding to the images.
        transform (callable): Transformation function applied to each image.

    Methods:
        __len__():
            Returns the number of image-label pairs in the dataset.
        __getitem__(index):
            Loads an image at the given index, applies transformations (if any), 
            and returns the image along with its label.

    '''
    def __init__(self, file_paths, labels, transform=None):
        self.file_paths = file_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        img_path = self.file_paths[idx]
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        img = read_image(img_path).float() / 255.0

        if self.transform:
            img = self.transform(img)

        return img, label

# Dataset Preparation Function
def prepare_dataset(base_dir):
    if not os.path.isdir(base_dir):
        raise ValueError(f"Directory {base_dir} does not exist.")

    file_paths = []
    labels = []
    label_map = {"FAKE": 0, "REAL": 1}  # 0: Fake, 1: Real

    for subfolder in label_map:
        folder_path = os.path.join(base_dir, subfolder)
        if not os.path.isdir(folder_path):
            print(f"Warning: Subfolder {subfolder} not found in {base_dir}. Skipping...")
            continue
        for fname in os.listdir(folder_path):
            file_path = os.path.join(folder_path, fname)
            if os.path.isfile(file_path) and fname.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".tiff")):
                file_paths.append(file_path)
                labels.append(label_map[subfolder])

    return file_paths, labels


test_dir = "/test"

# Preparing Dataset

test_paths, test_labels = prepare_dataset(test_dir)

train_transform = T.Compose([
    T.Resize((224, 224)),
    T.RandomHorizontalFlip(p=0.5),
    T.RandomVerticalFlip(p=0.5),
])

test_transform = T.Compose([
    T.Resize((224, 224)),
])

# Dataset and DataLoader
batch_size = 64


test_dataset = CustomDataset(test_paths, test_labels, transform=test_transform)


test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Model Definition
class CustomEffNetV2S(nn.Module):
    '''
    A custom implementation of the EfficientNet V2-S model for binary or multi-class classification tasks.

    This class modifies the default EfficientNet V2-S architecture by replacing the final classifier layer 
    to adapt to a specific number of output classes. Additionally, a dropout layer is added after the 
    classifier to help prevent overfitting.

    Args:
        num_classes (int, optional): The number of output classes. Defaults to 1 (binary classification).
        dropout_rate (float, optional): The probability of an element being zeroed in the dropout layer. 
                                        Defaults to 0.5.

    Attributes:
        effnet (EfficientNetV2_S): The EfficientNet V2-S base model, preloaded with weights.
        dropout (nn.Dropout): A dropout layer applied to the output of the classifier.

    Methods:
        forward(x):
            Forward pass of the model. Takes input tensor `x`, processes it through EfficientNet V2-S 
            and applies dropout before returning the output.

    '''
    def __init__(self, num_classes=1, dropout_rate=0.5):
        super(CustomEffNetV2S, self).__init__()
        self.effnet = efficientnet_v2_s(weights=EfficientNet_V2_S_Weights.DEFAULT)
        self.effnet.classifier[1] = nn.Linear(self.effnet.classifier[1].in_features, num_classes)
        self.dropout = nn.Dropout(p=dropout_rate)

    def forward(self, x):
        x = self.effnet(x)
        x = self.dropout(x)
        return x

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
model_efficient_net = CustomEffNetV2S().to(DEVICE)

# Loss and Optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model_efficient_net.parameters(), lr=3e-4)

# Training Loop
def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss, correct = 0.0, 0

    for images, labels in tqdm(dataloader, desc="Training"):
        images, labels = images.to(device), labels.to(device).unsqueeze(1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        predicted = (torch.sigmoid(outputs) > 0.5).float()
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(dataloader.dataset)
    accuracy = correct / len(dataloader.dataset)
    return epoch_loss, accuracy

def validate_epoch(model, dataloader, criterion, device):
    model.eval()
    running_loss, correct = 0.0, 0

    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="Validating"):
            images, labels = images.to(device), labels.to(device).unsqueeze(1)
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(dataloader.dataset)
    accuracy = correct / len(dataloader.dataset)
    return epoch_loss, accuracy

In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
model_efficient_net = CustomEffNetV2S().to(DEVICE)

# Loss and Optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model_efficient_net.parameters(), lr=3e-4)
device = DEVICE

In [None]:
best_weights_path = "ensemble_weights/effnet"
best_weights = torch.load(best_weights_path, map_location=DEVICE)
# Load the best weights into the model
model_efficient_net.load_state_dict(best_weights, strict=True)

  best_weights = torch.load(best_weights_path, map_location=DEVICE)


<All keys matched successfully>

## Vit -tiny data loading and evaluation

In [None]:

# Parameters
batch_size = 32
num_epochs = 10
learning_rate = 1e-4
num_classes = 2  # Assuming CIFAKE has two classes: Real and Fake
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transform2 = T.Compose([
    T.Resize(size=(32, 32)),
    T.ToTensor()
])

test_dataset1 = datasets.ImageFolder(root="/test", transform=transform2)
test_loader1 = DataLoader(test_dataset1, batch_size=batch_size, shuffle=False)

In [None]:

class CombinedViTModel(nn.Module):
    '''
    A combined Vision Transformer (ViT) model for binary classification. This model utilizes a pre-trained ViT base
    model (e.g., ViT Tiny) for feature extraction and adds a custom classification head on top.

    The base model's classification head is removed to focus only on the embedding features (CLS token). A new 
    linear classifier is added to perform binary classification on the extracted features.

    Args:
        base_model_name (str, optional): The name of the pre-trained ViT model to use. Defaults to 'vit_tiny_patch16_224'.
        embedding_dim (int, optional): The dimension of the embeddings output by the ViT model. Defaults to 192.
        num_classes (int, optional): The number of output classes. Defaults to 2 (binary classification).

    Attributes:
        base_model (nn.Module): The pre-trained Vision Transformer model (with the classification head removed).
        classifier (nn.Linear): A linear layer used as the classification head, which takes the embeddings from 
                                the base model and outputs logits for binary classification.

    Methods:
        forward(x):
            Forward pass of the model. Extracts embeddings from the ViT model and passes them through the 
            classifier to obtain logits.
    '''

    def __init__(self, base_model_name='vit_tiny_patch16_224', embedding_dim=192, num_classes=2):
        super(CombinedViTModel, self).__init__()
        # Load pre-trained ViT model
        self.base_model = timm.create_model(base_model_name, pretrained=True, img_size = 32)
        self.base_model.head = nn.Identity()  # Nullify the classification head
        
        # New classification head
        self.classifier = nn.Linear(embedding_dim, 1)

    def forward(self, x):
        # Extract embeddings (CLS token) from the base model
        embeddings = self.base_model.forward_features(x)[:, 0]  # CLS token is the first token
        logits = self.classifier(embeddings)  # Binary classification logits
        return embeddings, logits


count1 = 0
# Define the Combined Loss Function
class CombinedLoss(nn.Module):
    '''
    A custom loss function that combines Binary Cross-Entropy (BCE) loss and a contrastive loss 
    for tasks that involve both classification and representation learning.

    The BCE loss is used for binary classification tasks, while the contrastive loss encourages 
    similar embeddings for samples with the same label and dissimilar embeddings for samples with 
    different labels. The two losses are weighted and combined into a single loss value.

    Args:
        margin (float, optional): The margin used in the contrastive loss. Defaults to 1.0.
        temperature (float, optional): The temperature scaling factor for the contrastive loss. Defaults to 0.07.
        lambda_contrastive (float, optional): The weight for the contrastive loss term when combining with BCE loss. Defaults to 0.5.

    Attributes:
        bce_loss (nn.BCEWithLogitsLoss): The Binary Cross-Entropy loss function.
        margin (float): The margin for the contrastive loss.
        temperature (float): The temperature scaling factor for the contrastive loss.
        lambda_contrastive (float): The weight for combining contrastive loss with BCE loss.

    Methods:
        forward(embeddings, logits, labels):
            Computes the combined loss by adding weighted BCE loss and contrastive loss.

    '''
    def __init__(self, margin=1.0, temperature=0.07, lambda_contrastive=0.5):
        super(CombinedLoss, self).__init__()
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.margin = margin
        self.temperature = temperature
        self.lambda_contrastive = lambda_contrastive

    def forward(self, embeddings, logits, labels):
        global count1
        # Compute BCE Loss
        bce_loss = self.bce_loss(logits, labels.float().unsqueeze(1))
        
        # Compute Contrastive Loss
        contrastive_loss = self.contrastive_loss(embeddings, labels)
        
        # Combine the Losses
        combined_loss = bce_loss + self.lambda_contrastive * contrastive_loss
        if count1 == 0:
            print(bce_loss)
            print(contrastive_loss)
            count1 += 1
        return combined_loss

    def contrastive_loss(self, features, labels):
        # Normalize features
        features = F.normalize(features, dim=1)
        
        # Compute similarity matrix
        sim_matrix = torch.matmul(features, features.T) / self.temperature
        
        # Create label masks
        labels = labels.view(-1, 1)
        mask_pos = torch.eq(labels, labels.T).float()
        mask_neg = 1 - mask_pos
        
        # Remove diagonal
        mask_pos = mask_pos - torch.eye(mask_pos.shape[0], device=mask_pos.device)
        
        # Compute positive and negative losses
        exp_sim = torch.exp(sim_matrix)
        log_prob = sim_matrix - torch.log(exp_sim.sum(dim=1, keepdim=True))
        
        # Mean over positive pairs
        mean_log_prob_pos = (mask_pos * log_prob).sum(1) / mask_pos.sum(1).clamp(min=1e-8)
        
        return -mean_log_prob_pos.mean()

In [None]:

temperature = 0.5
lambda_contrastive = 0.01

model_tinyvit = CombinedViTModel().to(device)
criterion = CombinedLoss(temperature=temperature, lambda_contrastive=lambda_contrastive).to(device)
optimizer = optim.Adam(model_tinyvit.parameters(), lr=learning_rate)

best_weights_path = "/ensemble_weights/ViT_tiny_corrected_dataset_weights.pth"
best_weights = torch.load(best_weights_path, map_location=device)
model_tinyvit.load_state_dict(best_weights, strict=False)


  best_weights = torch.load(best_weights_path, map_location=device)


<All keys matched successfully>

## Resnet-18 data loading and evaluation

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT).to(device)
model_resnet.avgpool = nn.AdaptiveAvgPool2d(1)
num_classes = 2
model_resnet.fc = nn.Linear(model_resnet.fc.in_features,num_classes)
model_resnet.to(device)
checkpoint = torch.load('ensemble_weights/best_model_resnet_4th.pth')
model_resnet.load_state_dict(checkpoint)
model_resnet.to(device)

  checkpoint = torch.load('/kaggle/input/weightss/corrected_Weights 2/best_model_resnet_4th.pth')


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

## Official Adobe dataset label predictions 

In [None]:
# Path to the test images
test_data_path = "/adobe_data"

transforms2 = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms



# Define a custom dataset for the test data
class CustomTestDataset(Dataset):
    '''
    A custom PyTorch dataset for loading and preprocessing images for testing. 
    This dataset reads images from a specified directory, applies transformations 
    (if any), and returns the image along with its file path.

    Args:
        root_dir (str): The root directory containing the test images.
        transform (callable, optional): A function/transform to be applied to the images. 
                                        Defaults to None.

    Attributes:
        root_dir (str): Path to the directory containing test images.
        image_paths (list): A list of file paths for images in the root directory.
        transform (callable): The transformation function applied to the images.

    Methods:
        __len__():
            Returns the number of images in the dataset.
        __getitem__(index):
            Loads an image at the given index, applies transformations (if any), 
            and returns the image along with its file path.

    '''
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.image_paths = [os.path.join(root_dir, img) for img in os.listdir(root_dir) if img.endswith(('.png', '.jpg', '.jpeg'))]
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        image = Image.open(image_path).convert("RGB")  # Ensure 3-channel images
        if self.transform:
            image = self.transform(image)
        return image, image_path  # Returning image and its path for reference

# Create the dataset
test_dataset2 = CustomTestDataset(root_dir=test_data_path, transform=transforms2)

# Create the dataloader
test_loader2 = DataLoader(test_dataset2, batch_size=64, shuffle=False)  # Adjust batch_size as needed

In [None]:
model_resnet.eval()
# Lists to store predictions and file paths
test_predictions = []
image_paths = []
# Hyperparameter for threshold
threshold = 0.79  # Adjust as needed
# Prediction loop
print("Generating predictions on test data...")
with torch.no_grad():
    for images, paths in tqdm(test_loader2):
        images = images.to(device)
        outputs = model_resnet(images)
        prob = torch.softmax(outputs, dim=1)[:, 1] # Get probability of class 1 (real class)
        test_predictions.extend(prob.cpu().numpy())
        image_paths.extend(paths)

# Save predictions to a file
# Create a DataFrame for predictions and corresponding file paths
predictions_df = pd.DataFrame({"Image_Path": image_paths, "Prediction": test_predictions})

# Save to CSV
predictions_df.to_csv("/resnet_predictions.csv", index=False)
print("Probability saved to resnet_predictions.csv")


Generating predictions on test data...


100%|██████████| 5/5 [00:00<00:00,  6.57it/s]

Probability saved to resnet_predictions.csv





In [None]:
transform1 = T.Compose([
    T.Resize(size=(32, 32)),
    T.ToTensor()
])

test_dataset1 = CustomTestDataset(root_dir=test_data_path, transform=transform1)

# Create the dataloader
test_loader1 = DataLoader(test_dataset1, batch_size=64, shuffle=False)  

In [None]:
model_tinyvit.eval()

test_predictions = []
image_paths = []

# Prediction loop
print("Generating predictions on test data with TinyViT...")
with torch.no_grad():
    for images, paths in tqdm(test_loader1, desc="Testing TinyViT"):
        images = images.to(device)
        # Forward pass
        _, logits = model_tinyvit(images)
        # Get probabilities using sigmoid for binary classification
        prob = torch.sigmoid(logits).squeeze()
        # predicted = prob.round().long()
        test_predictions.extend(prob.cpu().numpy())
        image_paths.extend(paths)

# Create a DataFrame for predictions and corresponding file paths
predictions_df = pd.DataFrame({"Image_Path": image_paths, "Prediction": test_predictions})

# Save to CSV (optional)
output_file = "/tinyvit_predictions.csv"
predictions_df.to_csv(output_file, index=False)

print(f"Predictions saved to {output_file}")


Generating predictions on test data with TinyViT...


Testing TinyViT: 100%|██████████| 5/5 [00:00<00:00, 15.13it/s]

Predictions saved to /kaggle/working/tinyvit_predictions.csv





In [None]:

class CustomDataset(Dataset):
    '''
    A custom PyTorch dataset for loading and preprocessing images from a list of file paths. 
    The images are read, optionally transformed, and returned for training or inference.

    Args:
        file_paths (list of str): A list of file paths pointing to the images.
        transform (callable, optional): A function/transform to apply to the images. 
                                        Defaults to None.

    Attributes:
        file_paths (list): List of image file paths.
        transform (callable): Transformation function applied to each image.

    Methods:
        __len__():
            Returns the number of images in the dataset.
        __getitem__(index):
            Loads an image at the given index, applies transformations (if any), 
            and returns the image.
    '''
    def __init__(self, file_paths,transform=None):
        self.file_paths = file_paths
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        img_path = self.file_paths[idx]
        img = read_image(img_path).float() / 255.0

        if self.transform:
            img = self.transform(img)

        return img

# Dataset Preparation Function
def prepare_dataset(base_dir):
    if not os.path.isdir(base_dir):
        raise ValueError(f"Directory {base_dir} does not exist.")

    file_paths = []
    
    for fname in os.listdir(base_dir):
        file_path = os.path.join(base_dir, fname)
        if os.path.isfile(file_path) and fname.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".tiff")):
            file_paths.append(file_path)
    
    return file_paths


# Paths

test_dir = "/adobe_data"

# Prepare Dataset

test_paths = prepare_dataset(test_dir)

# Transforms
# Transforms
train_transform = T.Compose([
    T.Resize((224, 224)),
    T.RandomHorizontalFlip(p=0.5),
    T.RandomVerticalFlip(p=0.5),
])

test_transform = T.Compose([
    T.Resize((224, 224)),
])

# Dataset and DataLoader
batch_size = 64


test_dataset = CustomDataset(test_paths, transform=test_transform)


test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Training Loop

In [None]:
# Ensure the model is in evaluation mode
model_efficient_net.eval()

# Lists to store predictions and file paths
test_predictions = []
test_image_paths = []

# Hyperparameter for threshold
threshold = 0.5  # Adjust as needed

# Prediction loop
print("Generating predictions on test data with EfficientNet...")
with torch.no_grad():
    for images in tqdm(test_loader, desc="Testing EfficientNet"):
        images = images.to(device)  # Move images to the device
        outputs = model_efficient_net(images)
        prob = torch.sigmoid(outputs).squeeze()
        test_predictions.extend(prob.cpu().numpy())

# Associate predictions with image paths
test_image_paths = test_paths  # Paths are already available

# Save predictions to a file
# Create a DataFrame for predictions and corresponding file paths
predictions_df = pd.DataFrame({"Image_Path": test_image_paths, "Prediction": test_predictions})

# Save to CSV (optional)
output_file = "/efficientnet_predictions.csv"
predictions_df.to_csv(output_file, index=False)

print(f"Predictions saved to {output_file}")

Generating predictions on test data with EfficientNet...


Testing EfficientNet: 100%|██████████| 5/5 [00:00<00:00,  5.14it/s]

Predictions saved to /kaggle/working/efficientnet_predictions.csv





In [None]:
# Load predictions from CSVs
efficientnet_df = pd.read_csv("/efficientnet_predictions.csv")
tinyvit_df = pd.read_csv("/tinyvit_predictions.csv")
resnet_df = pd.read_csv("/resnet_predictions.csv")

# Merge DataFrames on Image_Path
combined_df = efficientnet_df.merge(tinyvit_df, on="Image_Path").merge(resnet_df, on="Image_Path")

# Assign weights to models (adjust as hyperparameters)
weights = {
    "EfficientNet_Prob": 0.6,
    "TinyViT_Prob": 0.3,
    "ResNet_Prob": 0.1
}

# Calculate weighted average
combined_df["Weighted_Prob"] = (
    weights["EfficientNet_Prob"] * combined_df["Prediction_x"] +
    weights["TinyViT_Prob"] * combined_df["Prediction_y"] +
    weights["ResNet_Prob"] * combined_df["Prediction"]
)

# Apply a final threshold to classify as binary (fake or real)
final_threshold = 0.52
combined_df["Final_Prediction"] = (combined_df["Weighted_Prob"] >= final_threshold).astype(int)

# Save final predictions to CSV
output_file = "/final_predictions.csv"
combined_df.to_csv(output_file, index=False)
print(f"Final predictions saved to {output_file}")



Final predictions saved to /kaggle/working/final_predictions.csv


## To create json file of the Predicted output

In [None]:
import pandas as pd
import json

data = pd.read_csv('/final_predictions.csv')
data['Final_Prediction']
data['New_Image_Path'] = 0
for i in range(len(data['Image_Path'])):
  data['New_Image_Path'][i] = data['Image_Path'][i].split('/')[-1]
data['New_Image_Path']
data['Image_number'] =  0
for i in range(len(data['Image_Path'])):
  data['Image_number'][i]=data['New_Image_Path'][i].split('.')[0]

In [None]:
data_for_images = data
data_for_images['Image_Path']
data_for_images = data_for_images.drop(['Prediction_x','New_Image_Path','Prediction','Weighted_Prob','Prediction_y','Image_number'], axis=1)
data_zero_prediction = data_for_images[data_for_images['Final_Prediction'] == 0]
data_zero_prediction = data_zero_prediction[['Image_Path','Final_Prediction']]

In [None]:
import os
import shutil

out_folder = "fake_images"
os.makedirs(out_folder, exist_ok=True)

for image_path in data_zero_prediction["Image_Path"]:
    try:
        # Check if the file exists before attempting to copy
        if os.path.exists(image_path):
            shutil.copy(image_path, out_folder)
        else:
            print(f"File not found: {image_path}")
    except:
        pass


In [None]:
data = data.drop(['Image_Path', 'New_Image_Path'], axis=1)
data['Image_number'] = pd.to_numeric(data['Image_number'], errors='coerce')

# Sort the DataFrame by the 'Image_number' column
data = data.sort_values('Image_number')

# Reset the index to have consecutive numbers
data = data.reset_index(drop=True)

# Create a new 'Image_number' column with consecutive integers starting from 1
data['Image_number'] = range(1, len(data) + 1)

In [None]:
data2 = data
data2 = data.drop(['Prediction_x', 'Prediction_y', 'Weighted_Prob','Prediction'], axis=1)
data2['Final_Prediction'] = data2['Final_Prediction'].replace({0: 'fake', 1: 'real'})

In [None]:
# Ensure the column types are native Python types
data2['Image_number'] = data2['Image_number'].astype(int)
data2['Final_Prediction'] = data2['Final_Prediction'].astype(str)

# Convert dataframe to desired JSON format
result = data2.apply(lambda row: {"index": row['Image_number'], "prediction": row['Final_Prediction']}, axis=1).tolist()

# Save the JSON output
json_output = json.dumps(result, indent=4)

# Save the file to Colab
output_file_path = "/predictions.json"
with open(output_file_path, "w") as json_file:
    json_file.write(json_output)