# ‚òÅÔ∏è PersonaPlex Cloud: Modal-Ready Inference (Inside Out)

This notebook is specifically configured to run on **Modal** using remote GPUs (A100/L4). 

**Features:**
1. **Modal Volume Access**: Loads models and voices directly from `/root/weights`.
2. **A100 Acceleration**: Runs high-speed inference without local hardware constraints.
3. **Emotional System**: Includes the full *Inside Out* emotional prompting logic.

In [None]:
import os
import torch
import numpy as np
from pathlib import Path
import IPython.display as ipd
import sys
import asyncio
import wave

# Modal path adjustment
sys.path.append(os.path.abspath('moshi'))

from moshi.models import loaders
from sentencepiece import SentencePieceProcessor
from moshi.models.lm import LMGen

print("‚úÖ Cloud Libraries loaded.")

## 1. Cloud Model Initialization
Loading models from the Modal persistent volume.

In [None]:
weights_dir = Path('/root/weights') # Default Modal volume path
device = 'cuda' if torch.cuda.is_available() else 'cpu'

if not weights_dir.exists():
    print("‚ö†Ô∏è Warning: /root/weights not found. If running locally, please change weights_dir to 'weights'.")
    weights_dir = Path('weights')

print(f"Loading models on {device} from {weights_dir}...")

mimi = loaders.get_mimi(weights_dir / loaders.MIMI_NAME, device)
moshi_lm = loaders.get_moshi_lm(
    weights_dir / loaders.MOSHI_NAME, 
    device=device,
    cpu_offload=False # Cloud A100 has plenty of VRAM
)

tokenizer_path = weights_dir / loaders.TEXT_TOKENIZER_NAME
text_tokenizer = SentencePieceProcessor(str(tokenizer_path))
lm_gen = LMGen(moshi_lm, text_tokenizer)

print("üöÄ Cloud System Ready.")

## 2. Emotional Configuration
Identical to local, ensures consistent behavior.

In [None]:
EMOTIONS = {
    "Joy": "Hello! I'm so excited to talk to you! Hee-hee! Everything is wonderful! [laugh]",
    "Sadness": "Hello... I'm feeling a bit down today. Everything seems so gray... [sigh]",
    "Anger": "I can't believe it! This is unacceptable! Beep-boop-grrr! I'm very angry!",
    "Fear": "What was that? I'm scared... are you there? Please don't go...",
    "Disgust": "Ugh, how gross. That's repulsive. I don't even want to look at it. Puaj."
}

def wrap_with_system_tags(text):
    return f"(user) {text} (assistant)"

def get_emotional_prompt(emotion, user_text):
    prefix = EMOTIONS.get(emotion, "")
    return wrap_with_system_tags(f"{prefix} {user_text}")

## 3. High-Speed Cloud Generation

In [None]:
def generate_audio_cloud(text, emotion="Joy", voice_pt="/root/weights/pepper.pt", output_wav="cloud_pepper_reply.wav", duration_frames=300):
    if os.path.exists(voice_pt):
        lm_gen.load_voice_prompt_embeddings(voice_pt)
        print(f"‚úÖ Voice loaded from {voice_pt}")
    
    emotional_text = get_emotional_prompt(emotion, text)
    lm_gen.text_prompt_tokens = text_tokenizer.encode(emotional_text)
    
    print(f"üé§ Generating cloud response for: {emotion}...")
    all_audio_chunks = []
    
    empty_audio_codes = torch.zeros((1, 8, 1), device=device, dtype=torch.long)
    
    for step in range(duration_frames):
        tokens = lm_gen.step(moshi_tokens=empty_audio_codes)
        if tokens is not None:
            audio_tokens = tokens[:, 1:9]
            pcm_chunk = mimi.decode(audio_tokens)
            all_audio_chunks.append(pcm_chunk.cpu().numpy().flatten())

    full_audio = np.concatenate(all_audio_chunks)
    import scipy.io.wavfile as wavfile
    wavfile.write(output_wav, 24000, (full_audio * 32767).astype(np.int16))
    
    print(f"\n‚ú® Cloud Generation Finished: {output_wav}")
    return output_wav

# --- CLOUD TEST ---
# generate_audio_cloud("Cloud Pepper, why are you so fast?", emotion="Joy")