In [None]:
# Import required libraries
import sys
import os
import requests
import json
from pathlib import Path

# Add src to Python path
sys.path.append('../src')

print("Libraries imported successfully!")

## 1. Check API Status

First, let's check if the API server is running.

In [None]:
# Check API health
api_base = "http://localhost:8000"

try:
    response = requests.get(f"{api_base}/health")
    if response.status_code == 200:
        health_data = response.json()
        print("‚úÖ API is healthy!")
        print(f"Status: {health_data['status']}")
        print(f"Models loaded: {health_data['models_loaded']}")
        print(f"Device: {health_data['device']}")
    else:
        print(f"‚ùå API health check failed: {response.status_code}")
except Exception as e:
    print(f"‚ùå Could not connect to API: {e}")
    print("Make sure to run: docker-compose up")

## 2. Text Generation

Test text generation capabilities.

In [None]:
# Test text generation
text_prompt = "The future of artificial intelligence is"

payload = {
    "prompt": text_prompt,
    "max_length": 50,
    "temperature": 0.7
}

try:
    response = requests.post(f"{api_base}/text/generate", json=payload)
    if response.status_code == 200:
        result = response.json()
        print("üìù Text Generation Result:")
        print(f"Prompt: {text_prompt}")
        print(f"Generated: {result['generated_text']}")
        print(f"Tokens: {result['tokens_generated']}")
        print(f"Time: {result['inference_time']:.2f}s")
    else:
        print(f"‚ùå Text generation failed: {response.text}")
except Exception as e:
    print(f"‚ùå Error: {e}")

## 3. Text Classification

Test text classification with custom labels.

In [None]:
# Test text classification
text_to_classify = "I love this new technology, it's amazing!"
labels = ["positive", "negative", "neutral"]

params = {
    "text": text_to_classify,
    "labels": labels
}

try:
    response = requests.post(f"{api_base}/text/classify", params=params)
    if response.status_code == 200:
        result = response.json()
        print("üè∑Ô∏è Text Classification Result:")
        print(f"Text: {text_to_classify}")
        print("Predictions:")
        for pred in result['predictions']:
            print(f"  {pred['label']}: {pred['score']:.3f}")
    else:
        print(f"‚ùå Text classification failed: {response.text}")
except Exception as e:
    print(f"‚ùå Error: {e}")

## 4. Audio Transcription

Test audio transcription (you'll need to provide an audio file).

In [None]:
# Create a simple audio file for testing (optional)
# This creates a simple sine wave as a test audio file

import numpy as np
import soundfile as sf
import io

# Generate a simple test audio (sine wave)
duration = 2  # seconds
sample_rate = 16000
frequency = 440  # A4 note

t = np.linspace(0, duration, int(sample_rate * duration))
audio_data = 0.3 * np.sin(2 * np.pi * frequency * t)

# Save to temporary file
temp_audio_path = "../data/test_audio.wav"
os.makedirs("../data", exist_ok=True)
sf.write(temp_audio_path, audio_data, sample_rate)

print(f"‚úÖ Created test audio file: {temp_audio_path}")
print(f"Duration: {duration}s, Sample rate: {sample_rate}Hz")

In [None]:
# Test audio transcription
audio_file_path = "../data/test_audio.wav"

if os.path.exists(audio_file_path):
    try:
        with open(audio_file_path, 'rb') as f:
            files = {'file': f}
            response = requests.post(f"{api_base}/audio/transcribe", files=files)
        
        if response.status_code == 200:
            result = response.json()
            print("üéµ Audio Transcription Result:")
            print(f"Transcription: '{result['transcription']}'")
            print(f"Language: {result.get('language', 'unknown')}")
            print(f"Confidence: {result['confidence']:.3f}")
            print(f"Time: {result['inference_time']:.2f}s")
        else:
            print(f"‚ùå Audio transcription failed: {response.text}")
    except Exception as e:
        print(f"‚ùå Error: {e}")
else:
    print(f"‚ùå Audio file not found: {audio_file_path}")
    print("Please provide an audio file or run the cell above to create a test file.")

## 5. Video Analysis

Test video analysis capabilities.

In [None]:
# Create a simple test video (colored frames)
import cv2

# Create a simple test video with colored frames
video_path = "../data/test_video.mp4"
width, height = 224, 224
fps = 5
duration = 2  # seconds

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(video_path, fourcc, fps, (width, height))

# Create frames with different colors
colors = [
    (255, 0, 0),    # Red
    (0, 255, 0),    # Green  
    (0, 0, 255),    # Blue
    (255, 255, 0),  # Yellow
    (255, 0, 255),  # Magenta
]

for i in range(fps * duration):
    color = colors[i % len(colors)]
    frame = np.full((height, width, 3), color, dtype=np.uint8)
    
    # Add some text
    cv2.putText(frame, f'Frame {i+1}', (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
    out.write(frame)

out.release()
print(f"‚úÖ Created test video: {video_path}")
print(f"Frames: {fps * duration}, Duration: {duration}s")

In [None]:
# Test video analysis
video_file_path = "../data/test_video.mp4"

if os.path.exists(video_file_path):
    try:
        with open(video_file_path, 'rb') as f:
            files = {'file': f}
            response = requests.post(f"{api_base}/video/analyze", files=files)
        
        if response.status_code == 200:
            result = response.json()
            print("üé• Video Analysis Result:")
            print(f"Description: {result['description']}")
            print(f"Objects detected: {result['objects']}")
            print(f"Actions: {result['actions']}")
            print(f"Time: {result['inference_time']:.2f}s")
        else:
            print(f"‚ùå Video analysis failed: {response.text}")
    except Exception as e:
        print(f"‚ùå Error: {e}")
else:
    print(f"‚ùå Video file not found: {video_file_path}")
    print("Please run the cell above to create a test video.")

## 6. Multimodal Chat

Test the multimodal chat interface with text, audio, and video inputs.

In [None]:
# Test multimodal chat
chat_prompt = "What can you tell me about the content I'm providing?"

# Prepare files
files = {}
data = {'text_prompt': chat_prompt}

# Add audio file if available
if os.path.exists("../data/test_audio.wav"):
    files['audio_file'] = open("../data/test_audio.wav", 'rb')

# Add video file if available
if os.path.exists("../data/test_video.mp4"):
    files['video_file'] = open("../data/test_video.mp4", 'rb')

try:
    response = requests.post(f"{api_base}/multimodal/chat", data=data, files=files)
    
    if response.status_code == 200:
        result = response.json()
        print("ü§ñ Multimodal Chat Result:")
        print(f"User: {chat_prompt}")
        print(f"Assistant: {result['response']}")
        print(f"Modalities used: {result['modalities_used']}")
        print(f"Time: {result['inference_time']:.2f}s")
    else:
        print(f"‚ùå Multimodal chat failed: {response.text}")
        
except Exception as e:
    print(f"‚ùå Error: {e}")
    
finally:
    # Close file handles
    for file_handle in files.values():
        if hasattr(file_handle, 'close'):
            file_handle.close()

## 7. Performance Summary

Let's get a summary of all the tests we ran.

In [None]:
# Summary of capabilities
print("üéØ Multimodal LLM Capabilities Summary:")
print("" + "="*50)
print("‚úÖ Text Processing:")
print("   - Text generation with controllable parameters")
print("   - Text classification with custom labels")
print("   - Embedding generation for similarity analysis")
print()
print("‚úÖ Audio Processing:")
print("   - Speech-to-text transcription (Whisper)")
print("   - Audio feature extraction")
print("   - Multi-language support")
print()
print("‚úÖ Video Processing:")
print("   - Video content analysis and description")
print("   - Object detection in video frames")
print("   - Action recognition")
print()
print("‚úÖ Multimodal Integration:")
print("   - Cross-modal conversation interface")
print("   - Fusion of text, audio, and video inputs")
print("   - Context-aware responses")
print()
print("üöÄ Ready for Development!")
print("Next steps:")
print("1. Customize models for your specific use case")
print("2. Fine-tune on your domain-specific data")
print("3. Optimize for your target hardware")
print("4. Deploy with appropriate scaling")