In [1]:
# Install TensorFlow (includes Keras)
!pip install tensorflow

# Install tqdm for progress visualization
!pip install tqdm

# Install scikit-learn for LabelEncoder
!pip install scikit-learn



In [13]:
!pip install transformers datasets accelerate

from transformers import ViTForImageClassification, ViTFeatureExtractor, TrainingArguments, Trainer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from datasets import Dataset
from PIL import Image
import os
import numpy as np
from sklearn.metrics import accuracy_score
from transformers import EvalPrediction
# Constants
IMAGE_SIZE = 224
BATCH_SIZE = 16
TRAIN_DIR = "/content/drive/MyDrive/New_Data/New_Data"

# Feature extractor
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')

# Function to load dataset and labels
def createdataframe(train_dir):
    """
    Creates a dataframe of image paths and labels from a directory.

    Args:
        train_dir: The path to the directory containing the images.

    Returns:
        A tuple containing two lists:
            - image_paths: A list of paths to the images.
            - labels: A list of labels corresponding to the images.
    """
    image_paths = []
    labels = []
    for label in os.listdir(train_dir):
        label_dir = os.path.join(train_dir, label)
        if os.path.isdir(label_dir):
            for filename in os.listdir(label_dir):
                if filename.lower().endswith(('.png', '.jpg', '.jpeg')):  # Add more image extensions if needed
                    image_path = os.path.join(label_dir, filename)
                    image_paths.append(image_path)
                    labels.append(label)
    return image_paths, labels

# Preprocess function for dataset
def preprocess_dataset(image_paths, labels, feature_extractor, input_size):
    le = LabelEncoder()
    encoded_labels = le.fit_transform(labels)

    # Create Hugging Face Dataset
    dataset = Dataset.from_dict({"image_path": image_paths, "label": encoded_labels})

    def preprocess_function(examples):
        images = [
            np.array(Image.open(path).convert('RGB').resize((input_size, input_size)))
            for path in examples["image_path"]
        ]
        inputs = feature_extractor(images, return_tensors="np")
        examples["pixel_values"] = inputs["pixel_values"]
        return examples

    # Apply preprocessing to the dataset
    dataset = dataset.map(preprocess_function, batched=True, batch_size=BATCH_SIZE)
    return dataset

# Load data
train_image_paths, train_labels = createdataframe(TRAIN_DIR)

# Prepare dataset
dataset = preprocess_dataset(train_image_paths, train_labels, feature_extractor, IMAGE_SIZE)

# Split into training and validation sets
train_size = int(len(dataset) * 0.8)  # 80% training
eval_size = len(dataset) - train_size  # 20% validation

# Select subsets
train_dataset = dataset.select(range(train_size))
eval_dataset = dataset.select(range(train_size, train_size + eval_size))

# Compute metrics function
def compute_metrics(eval_pred: EvalPrediction):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    accuracy = accuracy_score(labels, predictions)
    return {"accuracy": accuracy}

# Define the model
model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels=2
)

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=BATCH_SIZE,
    num_train_epochs=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    save_total_limit=2,  # Limit the number of saved checkpoints
)

# Define Trainer with compute_metrics
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=feature_extractor,  # Required for Hugging Face Trainer
    compute_metrics=compute_metrics  # Add compute_metrics function
)

# Train the model
trainer.train()

# Save the trained model
trainer.save_model("/content/model")






Map:   0%|          | 0/590 [00:00<?, ? examples/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.812728,0.228814
2,No log,0.799022,0.389831
3,No log,0.695033,0.610169
4,No log,0.602699,0.694915
5,No log,0.76324,0.635593
6,No log,0.716243,0.677966
7,No log,0.635653,0.728814
8,No log,0.664369,0.737288
9,No log,0.704661,0.728814
10,No log,0.691203,0.728814


In [15]:
!pip install torch torchvision



In [17]:
import pandas as pd
import os
from PIL import Image
import numpy as np
from tqdm.notebook import tqdm
import torch  # Import torch for tensor operations

# Constants for test data
TEST_DIR = "/content/drive/MyDrive/Test_Images"  # Adjust this to the path of your test dataset
BATCH_SIZE = 16

# Function to preprocess and extract features from test images
def preprocess_test_images(test_dir, feature_extractor, input_size):
    """
    Preprocess test images using the feature extractor.

    Args:
        test_dir: Path to the test images directory.
        feature_extractor: The ViT feature extractor.
        input_size: Target size to resize images.

    Returns:
        A tuple containing:
            - List of test image paths
            - Preprocessed pixel values for test images
    """
    test_image_paths = []
    pixel_values = []

    for filename in os.listdir(test_dir):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            test_image_paths.append(os.path.join(test_dir, filename))

    # Preprocess images
    for path in tqdm(test_image_paths, desc="Processing test images"):
        image = Image.open(path).convert('RGB').resize((input_size, input_size))
        pixel_values.append(feature_extractor(images=np.array(image), return_tensors="pt")["pixel_values"].squeeze(0))

    pixel_values = torch.stack(pixel_values)
    return test_image_paths, pixel_values

# Load test data
test_image_paths, test_pixel_values = preprocess_test_images(TEST_DIR, feature_extractor, IMAGE_SIZE)

# Prepare predictions
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    logits = model(test_pixel_values.to(model.device)).logits  # Predict logits
predictions = torch.softmax(logits, dim=1).cpu().numpy()

# Create submission DataFrame
submission = pd.DataFrame({
    "Id": [os.path.splitext(os.path.basename(path))[0] for path in test_image_paths],  # Remove file extensions
    "Label": ["AI" if pred[1] > pred[0] else "Real" for pred in predictions]
})

# Save to CSV
submission.to_csv("submission.csv", index=False)
print("Submission file saved as submission.csv")

Processing test images:   0%|          | 0/200 [00:00<?, ?it/s]

Submission file saved as submission.csv
