# Kencan AI Assistant - Google Colab Deployment

This notebook sets up Kencan on Google Colab with free GPU access.

**Setup Steps:**
1. Enable GPU: Runtime â†’ Change runtime type â†’ GPU
2. Run all cells in order
3. Copy the ngrok URL and configure your local agent
4. Start making requests!

In [None]:
# Install dependencies
!pip install -q transformers torch accelerate flask flask-cors pyngrok requests

In [None]:
# Import libraries
import os
import json
from flask import Flask, request, jsonify
from pyngrok import ngrok
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from threading import Thread

print(f"GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU Device: {torch.cuda.get_device_name(0)}")

In [None]:
# Configuration
MODEL_NAME = "microsoft/phi-2"  # Free, fast model that works on Colab
NGROK_AUTH_TOKEN = "YOUR_NGROK_TOKEN"  # Get free token from ngrok.com

# Set your ngrok token
if NGROK_AUTH_TOKEN != "YOUR_NGROK_TOKEN":
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)

In [None]:
# Load model
print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)

print("Model loaded successfully!")

In [None]:
# Create Flask API
app = Flask(__name__)

@app.route('/health', methods=['GET'])
def health():
    return jsonify({"status": "healthy", "gpu": torch.cuda.is_available()})

@app.route('/command', methods=['POST'])
def process_command():
    data = request.json
    user_input = data.get('input', '')
    
    # Generate response
    prompt = f"""You are Kencan, an AI assistant that helps control a Windows PC.
User request: {user_input}
Provide a JSON response with 'action' and 'parameters' fields.
Response:"""
    
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7,
        do_sample=True
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return jsonify({
        "response": response,
        "model": MODEL_NAME
    })

@app.route('/finetune', methods=['POST'])
def finetune():
    # Endpoint for fine-tuning with custom data
    return jsonify({"message": "Fine-tuning endpoint - implement with your data"})

print("Flask app created!")

In [None]:
# Start server with ngrok
port = 5000

# Start ngrok tunnel
public_url = ngrok.connect(port)
print(f"\n{'='*60}")
print(f"ðŸš€ Kencan AI Assistant is running!")
print(f"ðŸ“¡ Public URL: {public_url}")
print(f"{'='*60}\n")
print("Copy the URL above and configure it in your local agent's config/settings.json")
print("\nEndpoints:")
print(f"  - Health check: {public_url}/health")
print(f"  - Commands: {public_url}/command")
print(f"  - Fine-tune: {public_url}/finetune")

# Run Flask app
from flask import Flask
from werkzeug.serving import run_simple

run_simple('0.0.0.0', port, app, use_reloader=False, use_debugger=False)