# üé• Real-Time Face Swapping for Google Colab

**Real-time face swapping from webcam** with optimized performance (15-20 FPS target)

üì¶ **Repository**: [SmashCodeJJ/CIS5810_FinalProject](https://github.com/SmashCodeJJ/CIS5810_FinalProject)  
üîß **Branch**: `Youxin` (real-time implementation)

---

## ‚ö†Ô∏è IMPORTANT: Setup Process

1. ‚úÖ **Enable GPU**: Runtime ‚Üí Change runtime type ‚Üí GPU (T4)
2. ‚úÖ **Run Installation Cell** ‚Üí Installs dependencies
3. üîÑ **Restart Runtime** ‚Üí Click "Runtime ‚Üí Restart runtime"
4. ‚ñ∂Ô∏è **Run Real-Time Cells** ‚Üí Start face swapping

**Do NOT skip the runtime restart!**


## Step 1: Installation (Run once, then restart runtime)

After this cell completes, go to: **Runtime ‚Üí Restart runtime**


In [None]:
# Clone repository with real-time implementation
!git clone -b Youxin https://github.com/SmashCodeJJ/CIS5810_FinalProject.git sber-swap
%cd sber-swap

# Install dependencies
%pip install -q -r requirements.txt

# Download models (if needed)
import os
if not os.path.exists('weights/G_unet_2blocks.pth'):
    print("Downloading models...")
    !bash download_models.sh 2>/dev/null || echo "Models should be downloaded separately"

print("\n" + "="*50)
print("‚úÖ Installation complete!")
print("="*50)
print("‚ö†Ô∏è  IMPORTANT: Go to Runtime ‚Üí Restart runtime")
print("    Then skip this cell and run the cells below.")
print("="*50)


## Step 2: Verify Installation (Run after restart)


In [None]:
# Change to project directory
%cd /content/sber-swap

# Verify imports
import torch
import numpy as np
import cv2
import onnxruntime as ort
import insightface

print("="*50)
print("‚úÖ Environment Verified")
print("="*50)
print(f"PyTorch version: {torch.__version__}")
print(f"NumPy version: {np.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"ONNX Runtime version: {ort.__version__}")
print(f"InsightFace version: {insightface.__version__}")

if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print("üöÄ GPU acceleration enabled!")
else:
    print("‚ö†Ô∏è  Running on CPU (slower)")
print("="*50)


## Step 3: Real-Time Face Swapping with Webcam

### Upload Source Face Image

First, upload the face image you want to swap TO (the face that will appear on the webcam feed)


In [None]:
from google.colab import files
from IPython.display import Image, display
import os

# Create directory for uploads
!mkdir -p /content/sber-swap/examples/my_images

# Upload source face image
print("Upload SOURCE face image (the face to swap onto the webcam):")
uploaded = files.upload()

source_path = None
for filename in uploaded.keys():
    source_path = f"/content/sber-swap/examples/my_images/{filename}"
    !mv "{filename}" "{source_path}"
    print(f"‚úÖ Source image saved to: {source_path}")
    display(Image(source_path))
    break

if source_path is None:
    print("‚ö†Ô∏è  Using default source image")
    source_path = "/content/sber-swap/examples/images/mark.jpg"


### JavaScript Webcam Capture Function

Colab doesn't support direct webcam access, so we use JavaScript to capture frames.


In [None]:
import base64
import io
from PIL import Image
import numpy as np
import cv2
from IPython.display import display, HTML, Javascript
from google.colab.output import eval_js

def take_photo():
    """Capture photo from webcam using JavaScript"""
    # Use eval_js directly with JavaScript string
    js_code = '''
    async function takePhoto() {
        const video = document.createElement('video');
        const stream = await navigator.mediaDevices.getUserMedia({video: true});
        video.srcObject = stream;
        video.play();
        
        await new Promise(resolve => {
            video.onloadedmetadata = () => {
                video.setAttribute('width', video.videoWidth);
                video.setAttribute('height', video.videoHeight);
                resolve();
            }
        });
        
        const canvas = document.createElement('canvas');
        canvas.width = video.videoWidth;
        canvas.height = video.videoHeight;
        const ctx = canvas.getContext('2d');
        ctx.drawImage(video, 0, 0);
        
        video.srcObject.getTracks().forEach(track => track.stop());
        return canvas.toDataURL('image/jpeg', 0.95);
    }
    '''
    
    # Execute JavaScript and get result
    data = eval_js(js_code + 'takePhoto()')
    
    # Decode base64 image
    image_bytes = base64.b64decode(data.split(',')[1])
    image = Image.open(io.BytesIO(image_bytes))
    
    # Convert to OpenCV format
    frame = np.array(image)
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    
    return frame

print("‚úÖ Webcam capture function ready!")


In [None]:
import sys
import torch
import time
import numpy as np
import cv2
from IPython.display import display, Image as IPImage  # Use alias to avoid conflicts

# Add to path
sys.path.insert(0, '/content/sber-swap')

# Import real-time modules
from inference_realtime import init_models, load_source_face
from utils.realtime.face_tracker import FaceTracker
from utils.inference.realtime_processing import process_single_frame
from utils.realtime.performance_monitor import PerformanceMonitor

# Initialize models (only once)
print("Loading models... This may take a minute...")

class Args:
    def __init__(self):
        self.G_path = 'weights/G_unet_2blocks.pth'
        self.backbone = 'unet'
        self.num_blocks = 2
        self.fast_mode = True
        self.crop_size = 224
        self.detect_interval = 5
        self.tracker_type = 'CSRT'

args = Args()
app, G, netArc, handler = init_models(args)

# Load source face
source_embed = load_source_face(source_path, app, netArc, args.crop_size)

# Initialize tracker
tracker = FaceTracker(
    detector=app,
    detect_interval=args.detect_interval,
    tracker_type=args.tracker_type,
    confidence_threshold=0.6
)

# Initialize performance monitor
monitor = PerformanceMonitor(window_size=30)

print("\n‚úÖ Models loaded! Ready for real-time face swapping.")
print("\nüì∏ Click the cell below multiple times to capture and process frames.")


In [None]:
# Capture and process frame
try:
    # Capture frame from webcam
    frame = take_photo()
    
    # Start monitoring
    monitor.start_frame()
    
    # Update tracker
    bbox = tracker.update(frame)
    
    # Process frame
    det_time = 0
    gen_time = 0
    result = None
    
    if bbox is not None:
        result, det_time, gen_time = process_single_frame(
            frame=frame,
            source_embed=source_embed,
            netArc=netArc,
            G=G,
            app=app,
            handler=handler,
            bbox=bbox,
            crop_size=args.crop_size,
            half=True
        )
    
    # Record metrics
    monitor.record_detection_time(det_time)
    monitor.record_generator_time(gen_time)
    total_time = (time.time() - monitor.frame_start_time) * 1000 if monitor.frame_start_time else 0
    monitor.record_processing_time(total_time)
    monitor.end_frame()
    
    # Display result
    if result is not None:
        # Draw bbox
        x, y, w, h = bbox
        cv2.rectangle(result, (x, y), (x+w, y+h), (0, 255, 0), 2)
        
        # Add text with stats
        stats = monitor.get_stats()
        cv2.putText(result, f"FPS: {stats['fps']:.1f}", (10, 30),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        cv2.putText(result, f"Latency: {stats['avg_latency_ms']:.1f}ms", (10, 60),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
        
        display_frame = result
    else:
        display_frame = frame.copy()
        cv2.putText(display_frame, "No face detected", (10, 30),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
    
    # Convert to RGB for display
    display_frame_rgb = cv2.cvtColor(display_frame, cv2.COLOR_BGR2RGB)
    
    # Save and display
    output_path = '/content/sber-swap/examples/results/realtime_frame.jpg'
    cv2.imwrite(output_path, display_frame)
    
    # Display result (use IPImage alias to avoid conflicts)
    display(IPImage(output_path))
    
    # Print stats
    stats = monitor.get_stats()
    print(f"‚úÖ Frame processed! FPS: {stats['fps']:.1f} | "
          f"Latency: {stats['avg_latency_ms']:.1f}ms | "
          f"Detection: {stats['avg_detection_ms']:.1f}ms | "
          f"Generator: {stats['avg_generator_ms']:.1f}ms")
    
    print("\nüì∏ Click this cell again to capture another frame!")
    
except Exception as e:
    print(f"‚ùå Error: {e}")
    import traceback
    traceback.print_exc()
