# 1. Import necessary libraries

In [133]:
# Importing the libraries for video classification
import torch, copy, time, os, cv2
from torchvision.models.video.resnet import BasicBlock, Conv3DSimple
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import transforms
from sklearn.model_selection import train_test_split, StratifiedKFold
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# 2. Initialize the seed and the device

In [134]:
# Setting the seed for reproducibility
seed = 0
def reset_seed():
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

if torch.cuda.is_available():
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

# Setting the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 3. Dataset preparation

## 3.1. Video Dataset Class

In [135]:
# Defining the video dataset class
class VideoDataset(torch.utils.data.Dataset):
    def __init__(self, video_paths, label_paths, transform=None, frame_count=38):
        self.video_paths = video_paths
        self.label_paths = label_paths
        self.transform = transform
        self.frame_count = frame_count
        
    def __len__(self):
        return len(self.video_paths)
    
    def __getitem__(self, index):
        video_path = self.video_paths[index]
        label_path = self.label_paths[index]
        frames = self.load_frames(video_path=video_path)
        label = self.load_label(label_path=label_path)
        return frames, label
    
    def load_frames(self, video_path):
        frames = []
        video = cv2.VideoCapture(video_path)        
        total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        for i in range(total_frames):
            video.set(cv2.CAP_PROP_POS_FRAMES, i)
            ret, frame = video.read()
            if ret:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                frame_pil = Image.fromarray(frame)
                frames.append(frame_pil)                
        while len(frames) < self.frame_count:
            frames.append(frames[-1])   
        if self.transform is not None:
            frames = [self.transform(frame) for frame in frames] 
        print(np.array(frames).shape)
        frames = torch.stack(frames).permute(1, 0, 2, 3)
        return frames
    
    def load_label(self, label_path):
        label = []
        diacritics = {
            '\u064B',  # Fathatan
            '\u064C',  # Dammatan
            '\u064D',  # Kasratan
            '\u064E',  # Fatha
            '\u064F',  # Damma
            '\u0650',  # Kasra
            '\u0651',  # Shadda
            '\u0652',  # Sukun
            '\u06E2',  # Small High meem
        }
        a = pd.read_csv(label_path)
        for i in a.word:
            for char in i:
                if char not in diacritics:
                    label.append(char)
                else:
                    label[-1] += char
        return label
# Defining the video transform
transforms = transforms.Compose([
    transforms.Resize((112, 112)),
    transforms.ToTensor(),
    transforms.Normalize(mean=0.449, std=0.226),
])

## 3.2. Load the dataset

In [136]:
videos_dir = "D:/_hazem/Graduation Project/Arabic-Lib-Reading/Dataset/Video"
labels_dir = "D:/_hazem/Graduation Project/Arabic-Lib-Reading/Dataset/Csv (with Diacritics)"
videos, labels = [], []
file_names = [i[:-4] for i in os.listdir(videos_dir)]
for file_name in file_names:
    videos.append(os.path.join(videos_dir, file_name + ".mp4"))
    labels.append(os.path.join(labels_dir, file_name + ".csv"))


vid_data = VideoDataset(video_paths=videos, label_paths=labels, transform=transforms)

frames, label = vid_data.__getitem__(0)
frames.shape

(38, 1, 112, 112)


torch.Size([1, 38, 112, 112])

## 3.3. Split the dataset

In [None]:
# Split the dataset into training, validation, test sets
X_temp, X_test, y_temp, y_test = train_test_split(videos, labels, test_size=0.2, random_state=seed, stratify=labels)
kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=seed)
folds = list(kfold.split(np.zeros(len(X_temp)), y_temp))

## 3.4. DataLoaders

In [None]:
# Defining the video dataloaders (train, validation, test)
def get_dataloader(fold, frame_count=16):
    train_indices, val_indices = folds[fold]
    temp_dataset = VideoDataset(X_temp, y_temp, transform=transforms, frame_count=frame_count)
    test_dataset = VideoDataset(X_test, y_test, transform=transforms, frame_count=frame_count)
    train_loader = DataLoader(Subset(temp_dataset, train_indices), batch_size=8, shuffle=True, pin_memory=True)
    val_loader = DataLoader(Subset(temp_dataset, val_indices), batch_size=8, shuffle=False, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, pin_memory=True)
    return train_loader, val_loader, test_loader

# 4. Model

# 5. Training and Evaluation

In [None]:
# Training the model
def train_one_epoch(model, train_loader, optimizer):
    model.train()
    criterion = nn.CrossEntropyLoss()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()


def evaluate_model(model, val_loader, return_preds=False):
    model.eval()
    criterion = nn.CrossEntropyLoss()
    running_loss = 0.0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            preds = torch.argmax(outputs, dim=1)
            if return_preds:
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
    return running_loss / len(val_loader), all_preds, all_labels

# 6. Parallel Learning

## 6.1. Optimizing the learning rate and training and evaluating the model

In [None]:
def objective(lr, train_loader, val_loader, model_main):
    losses = []
    for lr_transfer, lr_tune in lr:
        model = copy.deepcopy(model_main)
        # Initialize model, optimizer, and loss function
        optimizer = optim.Adam(model.parameters(), lr=lr_transfer)
        best_val_loss = float('inf')

        # Train and validate
        for _ in range(15):
            train_one_epoch(model, train_loader, optimizer)
            val_loss, _, _ = evaluate_model(model, val_loader)
            # Early stopping check
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                torch.save(model.state_dict(), "best_model.pth")
            
        model.load_state_dict(torch.load("best_model.pth", weights_only=True))

        for param in model.parameters():
            param.requires_grad = True

        optimizer = optim.Adam(model.parameters(), lr=lr_tune)
        best_val_loss = float('inf')

        for _ in range(5):
            train_one_epoch(model, train_loader, optimizer)
            val_loss, _, _ = evaluate_model(model, val_loader)
            # Early stopping check
            if val_loss < best_val_loss:
                best_val_loss = val_loss

        losses.append(best_val_loss)
        print(f"lr_transfer = {lr_transfer:.10f}, lr_tune: {lr_tune:.10f}, val_loss = {best_val_loss:.4f}")
    time.sleep(0.5)
    return np.array(losses)

def train_model(model, lr, train_loader, val_loader):
    optimizer = optim.Adam(model.parameters(), lr=lr[0])
    best_val_loss = float('inf')

    # Train and validate
    for _ in range(60):
        train_one_epoch(model, train_loader, optimizer)
        val_loss, _, _ = evaluate_model(model, val_loader)
        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), "best_model.pth")
            
    model.load_state_dict(torch.load("best_model.pth", weights_only=True))

    for param in model.parameters():
        param.requires_grad = True

    optimizer = optim.Adam(model.parameters(), lr=lr[1])
    best_val_loss = float('inf')

    # Train and validate
    for _ in range(60):
        train_one_epoch(model, train_loader, optimizer)
        val_loss, _, _ = evaluate_model(model, val_loader)
        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), "best_model.pth")
            
    model.load_state_dict(torch.load("best_model.pth", weights_only=True))

