# Jarvis â€” Simple Colab Setup (Vision + Ollama proxy)


In [None]:
# Cell 1 â€” Check GPU and device
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

In [None]:
# Cell 2 â€” Install minimal dependencies
!pip install -q flask pyngrok requests transformers pillow accelerate

In [None]:
# Cell 3 â€” Install Ollama
!curl -fsSL https://ollama.com/install.sh | sh || true

In [None]:
# Cell 4 â€” Start Ollama server and pull model
import subprocess, time, os
print('Starting ollama serve...')
try:
    ollama_proc = subprocess.Popen(['ollama', 'serve'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    time.sleep(3)
    print('Ollama started (pid:', ollama_proc.pid, ')')
except Exception as e:
    print('Could not start ollama serve:', e)

print('Pulling phi model (this can take a couple minutes)...')
try:
    subprocess.run(['ollama', 'pull', 'phi'], check=True)
    print('Pulled phi model')
except Exception as e:
    print('Warning: ollama pull phi failed or skipped:', e)

In [None]:
# Cell 5 â€” Load vision model and create Flask app with /vision and /proxy_ollama
from flask import Flask, request, jsonify
import base64, io, gc, torch, time
from PIL import Image
from transformers import Blip2Processor, Blip2ForConditionalGeneration
import requests

app = Flask(__name__)
@app.route('/health', methods=['GET'])
def health():
    return jsonify({'status': 'healthy'})

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Loading BLIP-2 model on', device)
processor = Blip2Processor.from_pretrained('Salesforce/blip2-opt-2.7b')
model = Blip2ForConditionalGeneration.from_pretrained('Salesforce/blip2-opt-2.7b', torch_dtype=torch.float16, device_map='auto')
print('BLIP-2 loaded')

@app.route('/vision', methods=['POST'])
def vision_endpoint():
    try:
        data = request.get_json(force=True)
        image_b64 = data.get('image','')
        question = data.get('question','What do you see in this image?')
        if not image_b64:
            return jsonify({'error':'no image'}), 400
        image = Image.open(io.BytesIO(base64.b64decode(image_b64))).convert('RGB')
        prompt = f'Question: {question} Answer:'
        inputs = processor(images=image, text=prompt, return_tensors='pt').to(device, torch.float16)
        generated_ids = model.generate(**inputs, max_new_tokens=20, do_sample=False)
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
        if 'Answer:' in generated_text:
            answer = generated_text.split('Answer:')[-1].strip()
        else:
            answer = generated_text
        return jsonify({'answer': answer, 'question': question})
    except Exception as e:
        import traceback
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

@app.route('/proxy_ollama', methods=['POST'])
def proxy_ollama():
    try:
        raw_body = request.get_data()
        forward_headers = {k: v for k, v in request.headers.items() if k.lower() in ('content-type','user-agent','accept','ngrok-skip-browser-warning')}
        # Increased timeout to 120s for larger models like llama3.1:8b
        resp = requests.post('http://localhost:11434/api/chat', data=raw_body, headers=forward_headers, timeout=120)
        try:
            return jsonify(resp.json()), resp.status_code
        except Exception:
            return (resp.text, resp.status_code, {'Content-Type': resp.headers.get('Content-Type','text/plain')})
    except Exception as e:
        import traceback
        traceback.print_exc()
        return jsonify({'error': str(e)}), 500

print('Flask app created with /health, /vision, /proxy_ollama')

In [None]:
#Ummmm idk what to call this
from getpass import getpass
from pyngrok import ngrok

token = getpass("Enter your ngrok authtoken (): ")
ngrok.set_auth_token(token)
print("ngrok authtoken set â€” you can now create tunnels.")


In [None]:
# Cell 6 â€” Start Flask and ngrok tunnels
from pyngrok import ngrok
from threading import Thread
import time

print('Creating ngrok tunnels...')
ollama_tunnel = ngrok.connect(11434, bind_tls=True)
flask_tunnel = ngrok.connect(5000, bind_tls=True)
flask_url = flask_tunnel.public_url.replace('http://','https://')
ollama_url = ollama_tunnel.public_url.replace('http://','https://')
print('Flask URL:', flask_url)
print('Ollama URL:', ollama_url)

def run_flask():
    app.run(host='0.0.0.0', port=5000, use_reloader=False)

thread = Thread(target=run_flask, daemon=True)
thread.start()
time.sleep(3)
print('Flask server started (background thread)')

In [None]:
# Cell 7 â€” Basic tests: Vision and Proxy
import io, base64, requests, json
from PIL import Image

# Create a tiny red image
img = Image.new('RGB', (100,100), color='red')
buf = io.BytesIO()
img.save(buf, format='PNG')
b64 = base64.b64encode(buf.getvalue()).decode('utf-8')

# Local vision test (increased timeout for first inference warmup)
print("Testing vision (first call may take 30-60s for model warmup)...")
try:
    r = requests.post('http://localhost:5000/vision', json={'image': b64, 'question': 'What color is this?'}, timeout=120)
    print('Local vision status:', r.status_code, r.json())
except Exception as e:
    print('Local vision test failed:', e)

# Proxy test via public flask URL
print("\nTesting Ollama proxy...")
try:
    payload = {
        'model': 'phi',
        'messages': [
            {'role': 'system', 'content': 'You are Jarvis. Answer in one short sentence.'},
            {'role': 'user', 'content': 'Say hello.'}
        ]
    }
    resp = requests.post(flask_url + '/proxy_ollama', json=payload, timeout=60)
    print('Proxy status:', resp.status_code)
    try:
        print('Proxy JSON:', resp.json())
    except Exception:
        print('Proxy text:', resp.text[:200])
except Exception as e:
    print('Proxy test failed:', e)

In [None]:
# Cell 8 â€” Keep-alive loop
import time, requests
print('Starting keep-alive loop â€” stop manually to end')
while True:
    try:
        requests.get('http://localhost:5000/health', timeout=5)
        print('heartbeat...', end='\r')
    except Exception:
        pass
    time.sleep(60)