<a href="https://colab.research.google.com/github/PrishaAbrol/deepfake/blob/main/deep_fake.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary packages
!pip install -U -q evaluate transformers datasets accelerate torch torchvision

import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

from google.colab import drive

# Mount Google Drive to access the Hack folder
try:
    drive.mount('/content/drive')
except Exception as e:
    print(f"Error mounting Google Drive: {e}")

# Define paths to the Hack directory and subfolders
base_dir = '/content/drive/MyDrive/Hack'
train_dir = os.path.join(base_dir, 'Train')
test_dir = os.path.join(base_dir, 'Test')
val_dir = os.path.join(base_dir, 'Validation')

# Function to load images and labels from a directory
def load_images_from_folder(folder):
    images = []
    labels = []

    if not os.path.exists(folder):
        raise FileNotFoundError(f"Error: Directory '{folder}' not found.")

    if not os.path.isdir(folder):
        raise NotADirectoryError(f"Error: '{folder}' is not a directory.")

    for class_folder in os.listdir(folder):
        class_path = os.path.join(folder, class_folder)

        if os.path.isdir(class_path):
            for img_file in os.listdir(class_path):
                img_path = os.path.join(class_path, img_file)
                try:
                    img = Image.open(img_path).resize((224, 224))  # Resize images to 224x224
                    img_array = np.array(img) / 255.0  # Normalize to [0, 1]
                    images.append(img_array)
                    labels.append(class_folder)  # Use folder name as the label
                except Exception as e:
                    print(f"Error loading image {img_path}: {e}")

    return np.array(images), np.array(labels)

# Load images and labels from the Train, Test, and Validation directories
try:
    train_images, train_labels = load_images_from_folder(train_dir)
    test_images, test_labels = load_images_from_folder(test_dir)
    val_images, val_labels = load_images_from_folder(val_dir)

    print("Train set:", train_images.shape, train_labels.shape)
    print("Test set:", test_images.shape, test_labels.shape)
    print("Validation set:", val_images.shape, val_labels.shape)

except FileNotFoundError as fnf_error:
    print(fnf_error)
except NotADirectoryError as nd_error:
    print(nd_error)
except Exception as e:
    print(f"An unexpected error occurred: {e}")

# Display a few images with their labels from the training set
for i in range(5):  # Show 5 images
    plt.imshow(train_images[i])
    plt.title(f"Label: {train_labels[i]}")
    plt.axis('off')
    plt.show()

# Importing additional modules for machine learning
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Handle class imbalance using RandomOverSampler
from imblearn.over_sampling import RandomOverSampler
from pathlib import Path

file_names = []
labels = []
for file in sorted((Path(train_dir).glob('*/*.*'))):
    label = str(file).split('/')[-2]
    labels.append(label)
    file_names.append(str(file))

df = pd.DataFrame.from_dict({"image": file_names, "label": labels})
print(df['label'].unique())  # Check unique labels

# Resample to address class imbalance
y = df['label'].to_numpy()
df = df.drop(['label'], axis=1)
ros = RandomOverSampler(random_state=83)
df_resampled, y_resampled = ros.fit_resample(df, y)
df_resampled['label'] = y_resampled

# Convert to HuggingFace Dataset format
from datasets import Dataset, Image
dataset = Dataset.from_pandas(df_resampled).cast_column("image", Image())

# Set class labels
labels_list = ['Real', 'Fake']
label2id = {label: i for i, label in enumerate(labels_list)}
id2label = {i: label for i, label in enumerate(labels_list)}

# Data transformations using Torchvision
from torchvision.transforms import Compose, Resize, RandomRotation, RandomAdjustSharpness, ToTensor, Normalize
from transformers import ViTImageProcessor

model_str = "dima806/deepfake_vs_real_image_detection"
processor = ViTImageProcessor.from_pretrained(model_str)
image_mean, image_std = processor.image_mean, processor.image_std
size = processor.size["height"]

normalize = Normalize(mean=image_mean, std=image_std)
_train_transforms = Compose([Resize((size, size)), RandomRotation(90), RandomAdjustSharpness(2), ToTensor(), normalize])
_val_transforms = Compose([Resize((size, size)), ToTensor(), normalize])

# Split the dataset into training and test sets
dataset = dataset.train_test_split(test_size=0.2)
train_data = dataset['train']
test_data = dataset['test']

# Apply data transformations
train_data.set_transform(lambda examples: {'pixel_values': [_train_transforms(image.convert("RGB")) for image in examples['image']]})
test_data.set_transform(lambda examples: {'pixel_values': [_val_transforms(image.convert("RGB")) for image in examples['image']]})

# Define and configure the ViT model for classification
from transformers import ViTForImageClassification, Trainer, TrainingArguments

model = ViTForImageClassification.from_pretrained(model_str, num_labels=len(labels_list))
model.config.id2label = id2label
model.config.label2id = label2id

# Set up training arguments
args = TrainingArguments(
    output_dir="deepfake_vs_real_image_detection",
    evaluation_strategy="epoch",
    learning_rate=1e-6,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.02,
    save_strategy='epoch',
    load_best_model_at_end=True
)

# Define Trainer object
import torch

trainer = Trainer(
    model,
    args,
    train_dataset=train_data,
    eval_dataset=test_data,
    data_collator=lambda examples: {
        'pixel_values': torch.stack([example["pixel_values"] for example in examples]),
        'labels': torch.tensor([example['label'] for example in examples])
    },
    compute_metrics=lambda eval_pred: {
        "accuracy": accuracy_score(eval_pred.label_ids, eval_pred.predictions.argmax(1))
    }
)

# Train and evaluate the model
trainer.train()
trainer.evaluate()

# Confusion matrix plot function
y_true = trainer.predict(test_data).label_ids
y_pred = trainer.predict(test_data).predictions.argmax(1)

def plot_confusion_matrix(cm, classes, title='Confusion Matrix'):
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(title)
    plt.colorbar()
    plt.xticks(np.arange(len(classes)), classes, rotation=45)
    plt.yticks(np.arange(len(classes)), classes)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

cm = confusion_matrix(y_true, y_pred)
plot_confusion_matrix(cm, classes=labels_list)


In [None]:
print"Here's a chatbot for extra info:"
import dialogflow_v2 as dialogflow
import os

# Set the path to your Google Cloud service account key file
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "path-to-your-service-account-file.json"

def detect_intent_texts(project_id, session_id, text, language_code):
    session_client = dialogflow.SessionsClient()
    session = session_client.session_path(project_id, session_id)

    text_input = dialogflow.types.TextInput(text=text, language_code=language_code)
    query_input = dialogflow.types.QueryInput(text=text_input)

    response = session_client.detect_intent(session=session, query_input=query_input)

    return response.query_result.fulfillment_text

# Example usage
project_id = 'your-dialogflow-project-id'
session_id = '123456'  # Random unique ID for each session
text = 'Hello, how can you help me?'
language_code = 'en'

response = detect_intent_texts(project_id, session_id, text, language_code)
print(response)