# Model Training

This notebook handles the training of different models for the Multidisciplinary Deepfake Detection product. It includes steps for loading data, preprocessing, defining models, training, and saving the trained models.

In [None]:
# To import necessary libraries
import os
import numpy as np
import pandas as pd
from tensorflow.keras.optimizers import Adam
import torch
import joblib
import logging

# To set up logging
logging.basicConfig(filename='../logs/model_training.log', level=logging.INFO,
                    format='%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')

# To load configuration
from src.config import Config
from src.dataset.data_loader import load_csv_data
from src.dataset.data_splitter import split_data
from src.models.cnn import CNNModel
from src.models.transformer import TransformerModel
from src.models.svm import SVMModel
from src.models.bayesian import BayesianModel
from src.models.vision_transformer import VisionTransformer
from src.utils.logger import setup_logger

logging.info("Model training started.")

## To load Processed Data

To load the processed data for model training.

In [None]:
# To load processed data
processed_data_path = os.path.join(Config.PROCESSED_DATA_DIR, 'processed_data.csv')
logging.info("Loading processed data from {}.".format(processed_data_path))
data = load_csv_data(processed_data_path)
X = data.drop('label', axis=1)
y = data['label']
logging.info("Processed data loaded successfully with shape {}.".format(data.shape))

## To split data

To split the data into training and validation sets.

In [None]:
# Split data into training and validation sets
X_train, X_val, y_train, y_val = split_data(X, y, test_size=0.2)
logging.info("Data split into training and validation sets with training data shape: {} and validation data shape: {}.".format(X_train.shape, X_val.shape))

## To train CNN Model

To define, train, and save the CNN model.

In [None]:
# To train CNN model
logging.info("Training CNN model...")
cnn_model = CNNModel.build(input_shape=(64, 64, 3), num_classes=len(y.unique()))
optimizer = Adam(learning_rate=Config.CNN_PARAMS['learning_rate'])

cnn_model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
cnn_history = cnn_model.fit(X_train, y_train, epochs=Config.CNN_PARAMS['epochs'], batch_size=Config.CNN_PARAMS['batch_size'], validation_data=(X_val, y_val))

# Save the trained model
cnn_model_path = os.path.join(Config.MODEL_DIR, 'cnn_model.h5')
cnn_model.save(cnn_model_path)
logging.info("CNN model saved at {}".format(cnn_model_path))

## To train Transformer Model

To define, train, and save the Transformer model.

In [None]:
# To train Transformer model
logging.info("Training Transformer model...")
transformer_model = TransformerModel(
    input_dim=Config.TRANSFORMER_PARAMS['input_dim'],
    model_dim=Config.TRANSFORMER_PARAMS['model_dim'],
    num_heads=Config.TRANSFORMER_PARAMS['num_heads'],
    num_layers=Config.TRANSFORMER_PARAMS['num_layers'],
    output_dim=Config.TRANSFORMER_PARAMS['output_dim']
)

optimizer = torch.optim.Adam(transformer_model.parameters(), lr=Config.TRANSFORMER_PARAMS['learning_rate'])
criterion = torch.nn.BCELoss()

for epoch in range(Config.TRANSFORMER_PARAMS['epochs']):
    transformer_model.train()
    optimizer.zero_grad()
    outputs = transformer_model(torch.tensor(X_train.values, dtype=torch.float32))
    loss = criterion(outputs, torch.tensor(y_train.values, dtype=torch.float32))
    loss.backward()
    optimizer.step()
    logging.info("Epoch [{}/{}], Loss: {:.4f}".format(epoch + 1, Config.TRANSFORMER_PARAMS['epochs'], loss.item()))

# To save the trained model
transformer_model_path = os.path.join(Config.MODEL_DIR, 'transformer_model.pth')
torch.save(transformer_model.state_dict(), transformer_model_path)
logging.info("Transformer model saved at {}".format(transformer_model_path))

## Train SVM Model

To define, train, and save the SVM model.

In [None]:
# To train SVM model
logging.info("Training SVM model...")
svm_model = SVMModel.build(kernel=Config.SVM_PARAMS['kernel'], C=Config.SVM_PARAMS['C'])
svm_model.fit(X_train, y_train)

# To save the trained model
svm_model_path = os.path.join(Config.MODEL_DIR, 'svm_model.pkl')
joblib.dump(svm_model, svm_model_path)
logging.info("SVM model saved at {}".format(svm_model_path))

## To train Bayesian Model

To define, train, and save the Bayesian model.

In [None]:
# Train Bayesian model
logging.info("Training Bayesian model...")
bayesian_model = BayesianModel(prior_mean=Config.BAYESIAN_PARAMS['prior_mean'], prior_std=Config.BAYESIAN_PARAMS['prior_std'])
bayesian_model.fit(X_train.values, y_train.values)

# To save the trained model
bayesian_model_path = os.path.join(Config.MODEL_DIR, 'bayesian_model.pkl')
joblib.dump(bayesian_model, bayesian_model_path)
logging.info("Bayesian model saved at {}".format(bayesian_model_path))

## To train Vision Transformer Model

To define, train, and save the Vision Transformer model.

In [None]:
# To train Vision Transformer model
logging.info("Training Vision Transformer model...")
vision_transformer_model = VisionTransformer(
    img_size=Config.VISION_TRANSFORMER_PARAMS['img_size'],
    patch_size=Config.VISION_TRANSFORMER_PARAMS['patch_size'],
    num_classes=Config.VISION_TRANSFORMER_PARAMS['num_classes'],
    dim=Config.VISION_TRANSFORMER_PARAMS['dim'],
    depth=Config.VISION_TRANSFORMER_PARAMS['depth'],
    heads=Config.VISION_TRANSFORMER_PARAMS['heads'],
    mlp_dim=Config.VISION_TRANSFORMER_PARAMS['mlp_dim']
)

optimizer = torch.optim.Adam(vision_transformer_model.parameters(), lr=Config.VISION_TRANSFORMER_PARAMS['learning_rate'])
criterion = torch.nn.BCELoss()

for epoch in range(Config.VISION_TRANSFORMER_PARAMS['epochs']):
    vision_transformer_model.train()
    optimizer.zero_grad()
    outputs = vision_transformer_model(torch.tensor(X_train.values, dtype=torch.float32))
    loss = criterion(outputs, torch.tensor(y_train.values, dtype=torch.float32))
    loss.backward()
    optimizer.step()
    logging.info("Epoch [{}/{}], Loss: {:.4f}".format(epoch + 1, Config.VISION_TRANSFORMER_PARAMS['epochs'], loss.item()))

# Save the trained model
vision_transformer_model_path = os.path.join(Config.MODEL_DIR, 'vision_transformer_model.pth')
torch.save(vision_transformer_model.state_dict(), vision_transformer_model_path)
logging.info("Vision Transformer model saved at {}".format(vision_transformer_model_path))

## Summary

The training of all models has been completed. The trained models have been saved to the directory.

In [None]:
logging.info("Model training completed.")