<a href="https://colab.research.google.com/github/Gauravrk215/anime/blob/main/Untitled1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 🔧 Install dependencies
!pip install moviepy gradio av torch torchvision --quiet

# ✅ Imports
import torch, gc, math, os
import numpy as np
from PIL import Image
from torchvision.transforms.functional import to_tensor
from torchvision.io import write_video
import av
import moviepy.editor as mp
import gradio as gr

# ✅ Dummy model (replace with AnimeGAN2 model if available)
class DummyAnimeModel(torch.nn.Module):
    def forward(self, x):
        return x  # No-op for testing

device = "cuda" if torch.cuda.is_available() else "cpu"
model = DummyAnimeModel().to(device)
print(f"🧠 Using device: {device}")

# ✅ Frame sampling function (instead of pytorchvideo)
def uniform_temporal_subsample(x: torch.Tensor, num_samples: int, temporal_dim: int = 1) -> torch.Tensor:
    t = x.shape[temporal_dim]
    indices = torch.linspace(0, t - 1, num_samples).clamp(0, t - 1).long()
    return x.index_select(temporal_dim, indices)

# ✅ Resize keeping aspect ratio
def short_side_scale(x: torch.Tensor, size: int) -> torch.Tensor:
    if x.dim() == 5:
        x = x.squeeze(0)
    if x.dim() != 4:
        raise ValueError(f"Expected 4D tensor but got shape {x.shape}")
    c, t, h, w = x.shape
    if w < h:
        new_h = int((h / w) * size)
        new_w = size
    else:
        new_w = int((w / h) * size)
        new_h = size
    return torch.nn.functional.interpolate(x, size=(new_h, new_w), mode='bilinear', align_corners=False)

# ✅ Main processor
def predict_fn(filepath, start_sec, duration, out_fps):
    print("🚀 Started processing...")
    container = av.open(filepath)
    video_stream = container.streams.video[0]
    container.seek(int(start_sec * av.time_base))

    frames = []
    audio_clip = mp.VideoFileClip(filepath).audio.subclip(start_sec, start_sec + duration)

    for frame in container.decode(video=0):
        if frame.time < start_sec:
            continue
        if frame.time > start_sec + duration:
            break
        img = frame.to_image()
        tensor = to_tensor(img).unsqueeze(0)
        frames.append(tensor)

    if len(frames) == 0:
        raise ValueError("❌ No frames found.")

    video_tensor = torch.cat(frames, dim=0).permute(1, 0, 2, 3).unsqueeze(0)

    if video_tensor.dim() == 5:
        video_tensor = video_tensor.squeeze(0)

    x = uniform_temporal_subsample(video_tensor, duration * out_fps)
    x = short_side_scale(x, 512)
    x = torch.nn.functional.center_crop(x, [512, 512])
    x = x / 255.0
    x = x.permute(1, 0, 2, 3)

    with torch.no_grad():
        out = model(x.to(device)).cpu()
        out = (out * 0.5 + 0.5).clamp(0, 1) * 255.
        output_np = out.permute(0, 2, 3, 1).byte().numpy()

    out_file = "anime_output_with_audio.mp4"
    write_video(
        out_file,
        torch.from_numpy(output_np),
        fps=out_fps,
        audio_array=audio_clip.to_soundarray(fps=44100).T,
        audio_fps=44100,
        audio_codec='aac'
    )

    print("✅ Done. Returning final video.")
    return out_file

# ✅ Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🎬 Anime Video Converter (with Audio)")
    video_input = gr.Video(label="Upload your .mp4 video")
    start = gr.Slider(0, 300, step=1, value=0, label="Start Time (sec)")
    duration = gr.Slider(1, 10, step=1, value=3, label="Duration (sec)")
    fps = gr.Slider(6, 30, step=6, value=12, label="Output FPS")
    convert = gr.Button("🎨 Convert to Anime")
    output = gr.Video(label="Anime Output")

    convert.click(fn=predict_fn, inputs=[video_input, start, duration, fps], outputs=output)

demo.launch(share=True)


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.9/46.9 MB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.6/322.6 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.2/35.2 MB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m72.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m63.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m45.8 MB/s[0m eta [36m0:00:00[0m
[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m664.8/664.8 MB[0m [31m65.9 MB/s[0m eta [36m0:00:01[0m