# Code

Check GPU and set device

In [1]:
import torch

if torch.cuda.is_available():
    print(f"torch.version.cuda: {torch.version.cuda}")
    print(f"torch.backends.cudnn.enabled: {torch.backends.cudnn.enabled}")
    !nvidia-smi
    !nvcc --version
else:
    print("GPU is not available")


device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Running on {device}")

torch.version.cuda: 12.9
torch.backends.cudnn.enabled: True
Sat Sep 13 05:48:55 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 580.88                 Driver Version: 580.88         CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4060 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   47C    P3            590W /   68W |       0MiB /   8188MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+------------

## Load MAGI model

In [None]:
from transformers import AutoModel
import torch

model = (
    AutoModel.from_pretrained("ragavsachdeva/magiv2", trust_remote_code=True)
    .cuda()
    .eval()
)

KeyboardInterrupt: 

## Save/Load model to local

In [None]:
# Save model to directory:
import os

magi_model_dir = "../models/magi_model"
os.makedirs(magi_model_dir, exist_ok=True)
model.save_pretrained(magi_model_dir)

In [3]:
# Load model from directory:
from transformers import AutoModel

model = (
    AutoModel.from_pretrained("../models/magi_model", trust_remote_code=True)
    .cuda()
    .eval()
);

  from .autonotebook import tqdm as notebook_tqdm
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [None]:
####################### OR ###########################
# Cache model to directory:
# model_1 = AutoModel.from_pretrained("ragavsachdeva/magiv2", cache_dir="./magi_saved_model_directory/", trust_remote_code=True).cuda().eval()

# Code

# Define data/result directory

In [None]:
import os

data_folder = "../data"
result_folder = "../results"
manga_list = "vi/Ruri Dragon (Oneshot)/Ch. None"  # Will be changed to list later

# Ruri Dragon
manga_folder = os.path.join(data_folder, manga_list)
character_folder = os.path.join(data_folder, "mock_blank_character")

individual_result_folder = os.path.join(result_folder, manga_list)
json_output_dir = os.path.join(individual_result_folder, "json_results")
result_image_output_dir = os.path.join(individual_result_folder, "image_results")
transcript_output_dir = os.path.join(individual_result_folder, "transcript.txt")
os.makedirs(json_output_dir, exist_ok=True)  # Create the directory if it doesn't exist
os.makedirs(
    result_image_output_dir, exist_ok=True
)  # Create the directory if it doesn't exist

## Create raw and character/names list

In [12]:
import os
import re


def create_chapter_pages_and_character_bank(manga_folder, character_folder):
    # Create lists for chapter pages and character bank
    chapter_pages = []
    character_bank = {"images": [], "names": []}

    #     Iterate through manga images to create chapter_pages
    for image_file in os.listdir(manga_folder):
        if image_file.endswith(
            (".png", ".jpg", ".jpeg")
        ):  # Check for image file extensions
            # Extract the page number using regex
            match = re.search(r"p(\d+)", image_file)
            if match:
                page_number = int(match.group(1))  # Convert to integer for sorting
                chapter_pages.append(
                    (page_number, image_file)
                )  # Store as tuple (page_number, image_file)
            else:
                page_number = image_file.rsplit(".", 1)[0]
                chapter_pages.append(
                    (page_number, image_file)
                )  # Store as tuple (page_number, image_file)

    # Sort chapter pages by page number
    chapter_pages.sort(key=lambda x: x[0])
    chapter_pages = [
        os.path.join(manga_folder, img[1]) for img in chapter_pages
    ]  # Extract just the filenames after sorting

    # Iterate through character images to create character bank
    for char_image_file in os.listdir(character_folder):
        if char_image_file.endswith(
            (".png", ".jpg", ".jpeg")
        ):  # Check for image file extensions
            # Split the filename to extract character name
            char_name = char_image_file.split("_")[
                0
            ]  # Get the part before the underscore
            character_bank["images"].append(
                os.path.join(character_folder, char_image_file)
            )
            character_bank["names"].append(char_name)
    return chapter_pages, character_bank


# Get chapter pages and character bank
chapter_pages_original, character_bank_original = (
    create_chapter_pages_and_character_bank(manga_folder, character_folder)
)

chapter_pages_test = chapter_pages_original[:]
character_bank_test = character_bank_original

# Print the results (for debugging)
print("Chapter Pages:")
print(chapter_pages_test)

print("\nCharacter Bank:")
print(character_bank_test)

Chapter Pages:
['../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\00.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\01.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\02.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\03.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\04.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\05.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\06.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\07.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\08.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\09.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\10.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\11.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\12.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\13.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\14.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\15.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\16.jpg', '../data\\vi/Ruri Dragon (Oneshot)/Ch. None\\17.jpg', '../data\\vi

## Process (OCR → Transcript)

In [14]:
import os
import json
from PIL import Image
import numpy as np


def read_image(path_to_image):
    with open(path_to_image, "rb") as file:
        image = Image.open(file).convert("L").convert("RGB")
        image = np.array(image)
    return image


chapter_pages = [read_image(x) for x in chapter_pages_test]
character_bank = character_bank_test.copy()
character_bank["images"] = [read_image(x) for x in character_bank_test["images"]]

with torch.no_grad():
    per_page_results = model.do_chapter_wide_prediction(
        chapter_pages, character_bank, use_tqdm=True, do_ocr=True
    )

print("Continue with next cell")

100%|██████████| 6/6 [00:11<00:00,  1.95s/it]
100%|██████████| 14/14 [02:28<00:00, 10.63s/it]

Continue with next cell





## Save transcript

In [15]:
transcript = []
for i, (image, page_result) in enumerate(zip(chapter_pages, per_page_results)):
    image_name_ext = os.path.basename(chapter_pages_test[i])
    # Split the image name and its extension
    image_name, image_extension = os.path.splitext(image_name_ext)

    model.visualise_single_image_prediction(
        image, page_result, os.path.join(result_image_output_dir, f"{image_name}.png")
    )
    # Save page_result to JSON
    json_file_path = os.path.join(
        json_output_dir, f"{image_name}.json"
    )  # Create full file path
    with open(json_file_path, "w") as json_file:
        json.dump(page_result, json_file, indent=4)  # Save with pretty printing

    speaker_name = {
        text_idx: page_result["character_names"][char_idx]
        for text_idx, char_idx in page_result["text_character_associations"]
    }

    transcript.append(f"<page>{i:03}<endpage>")
    for j in range(len(page_result["ocr"])):
        if not page_result["is_essential_text"][j]:
            continue
        name = speaker_name.get(j, "unsure")
        transcript.append(f"<name>{name}<endname>: {page_result['ocr'][j]}")

with open(transcript_output_dir, "w") as fh:
    for line in transcript:
        fh.write(line + "\n")

print("\n\nDone you WEEEEB!")



Done you WEEEEB!


# Zip file (For kaggle only)

## Legacy

In [None]:
# json_output_dir = "/kaggle/working/json_results"
# result_image_output_dir = "/kaggle/working/image_results"

# # zip
# json_output_dir = "/kaggle/working/json_results"
# !zip -rj /kaggle/working/json.zip {json_output_dir}/*.json
# # download
# !cd /kaggle/working/
# display(FileLink('json.zip'))

## Download separate files

### Download json

In [34]:
import os
import subprocess
from IPython.display import FileLink, display


def download_file(download_file_name, source_path):
    os.chdir("/kaggle/working/")
    zip_name = f"/kaggle/working/{download_file_name}.zip"
    command = f"zip -rj {zip_name} {source_path}"
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    if result.returncode != 0:
        print("Unable to run zip command!")
        print(result.stderr)
        return
    display(FileLink(f"{download_file_name}.zip"))


download_file("json_results", f"{json_output_dir}/*.json")

### Download image results

In [33]:
import os
import subprocess
from IPython.display import FileLink, display


def download_file(download_file_name, source_path):
    os.chdir("/kaggle/working/")
    zip_name = f"/kaggle/working/{download_file_name}.zip"
    command = f"zip -rj {zip_name} {source_path}"
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    if result.returncode != 0:
        print("Unable to run zip command!")
        print(result.stderr)
        return
    display(FileLink(f"{download_file_name}.zip"))


download_file("image_results", result_image_output_dir)

### Download transcript

In [35]:
import os
import subprocess
from IPython.display import FileLink, display


def download_file(download_file_name, source_path):
    os.chdir("/kaggle/working/")
    zip_name = f"/kaggle/working/{download_file_name}.zip"
    command = f"zip -rj {zip_name} {source_path}"
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    if result.returncode != 0:
        print("Unable to run zip command!")
        print(result.stderr)
        return
    display(FileLink(f"{download_file_name}.zip"))


download_file("transcript", "/kaggle/working/result/transcript.txt")

## Download all

In [39]:
import os
import subprocess
from IPython.display import FileLink, display


def download_file(download_file_name, source_path):
    os.chdir("/kaggle/working/")
    zip_name = f"/kaggle/working/{download_file_name}.zip"
    command = f"zip -r {zip_name} {source_path}"
    result = subprocess.run(command, shell=True, capture_output=True, text=True)
    if result.returncode != 0:
        print("Unable to run zip command!")
        print(result.stderr)
        return
    display(FileLink(f"{download_file_name}.zip"))


!cp -r result_dir /
download_file("result", result_dir)

cp: cannot stat 'result_dir': No such file or directory
