"""
Author: Jacob Thomas Joshy
Purpose: Complete environment configuration for fine-tuning Ollama models
         specifically for pharmaceutical Standard Operating Procedure (SOP) generation
         
This notebook provides a comprehensive setup for:
- GPU-accelerated Ollama installation and configuration  
- Pharmaceutical-specific dataset preparation utilities
- FDA compliance validation framework integration
- Model evaluation and performance metrics

Requirements:
- Google Colab Pro recommended for GPU access (T4/V100)
- Minimum 12GB RAM for model fine-tuning
- Stable internet connection for model downloads
"""

## 1. System Requirements & GPU Setup

In [1]:
# Check GPU availability and specifications
!nvidia-smi
import torch
import os
import subprocess
import json

print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU count: {torch.cuda.device_count()}")
    print(f"Current GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("Warning: GPU not available.")

Wed Aug 27 10:22:49 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   48C    P8             10W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

## 2. Install Dependencies & Ollama

In [2]:
# Install Ollama and required dependencies
!curl -fsSL https://ollama.com/install.sh | sh
# Install Python dependencies for pharmaceutical data processing
!pip install -q transformers datasets accelerate bitsandbytes
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q requests beautifulsoup4 pandas numpy
!pip install -q matplotlib seaborn plotly
!pip install -q scikit-learn nltk spacy
!pip install -q jupyter-client ipywidgets

# Install pharmaceutical-specific libraries
!pip install -q biopython  # Biological data processing
!pip install -q chembl_webresource_client  # Chemical database access
!pip install -q pubchempy  # PubChem database access
print("All dependencies installed successfully!")

>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m41.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.2/55.2 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/61.4 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00

In [3]:
# Initialize Ollama background service
import subprocess
import time
import threading

def launch_ollama():
    """Launch ollama server process in background"""
    try:
        proc = subprocess.run(['ollama', 'serve'], check=True, capture_output=True)
    except subprocess.CalledProcessError as err:
        print(f"Service launch failed: {err}")

# Create background thread for service
bg_thread = threading.Thread(target=launch_ollama, daemon=True)
bg_thread.start()

# Allow startup time
time.sleep(5)

# Check if service is responding
try:
    check_result = subprocess.run(['ollama', 'list'],
                                 capture_output=True,
                                 text=True,
                                 timeout=10)

    if check_result.returncode == 0:
        print("Ollama service is running")
        print("Current models:")
        print(check_result.stdout)
    else:
        print("Service may still be initializing")

except subprocess.TimeoutExpired:
    print("Service check timed out - continuing anyway")
except Exception as error:
    print(f"Service verification failed: {error}")

Ollama service is running
Current models:
NAME    ID    SIZE    MODIFIED 



## 3. Download Base Models for Fine-tuning

In [4]:
# Model selection and download for pharmaceutical text generation
import subprocess
import time
import os

# Available model options with specifications
available_models = [
    {
        'name': 'llama2:7b-chat',
        'description': 'Llama 2 7B for general pharmaceutical text',
        'size': '4GB approx'
    },
    {
        'name': 'llama2:13b-chat',
        'description': 'Llama 2 13B higher quality but memory intensive',
        'size': '7GB approx'
    },
    {
        'name': 'mistral:7b-instruct',
        'description': 'Mistral 7B optimized for technical documentation',
        'size': '4GB approx'
    }
]

print("Available models for pharmaceutical SOP generation:")
for idx, model_info in enumerate(available_models, 1):
    print(f"{idx}. {model_info['name']} ({model_info['size']})")
    print(f"   {model_info['description']}")
    print()

# Select model for download
target_model = 'mistral:7b-instruct'
print(f"Downloading model: {target_model}")
print("Download time: 5-10 minutes depending on connection")

# Use os.system for progress visibility in notebook environment
download_status = os.system(f'ollama pull {target_model}')

if download_status == 0:
    print(f"Download completed: {target_model}")
else:
    print(f"Download failed with exit code: {download_status}")
    print("Manual download may be required")

# Verify available models
print("\nInstalled models:")
try:
    model_list = subprocess.run(['ollama', 'list'], capture_output=True, text=True)
    print(model_list.stdout)
except Exception as err:
    print(f"Could not list models: {err}")

Available models for pharmaceutical SOP generation:
1. llama2:7b-chat (4GB approx)
   Llama 2 7B for general pharmaceutical text

2. llama2:13b-chat (7GB approx)
   Llama 2 13B higher quality but memory intensive

3. mistral:7b-instruct (4GB approx)
   Mistral 7B optimized for technical documentation

Downloading model: mistral:7b-instruct
Download time: 5-10 minutes depending on connection
Download completed: mistral:7b-instruct

Installed models:
NAME                   ID              SIZE      MODIFIED               
mistral:7b-instruct    6577803aa9a0    4.4 GB    Less than a second ago    



## 4. Setup Ngrok Authentication & Tunnel

This section sets up ngrok tunneling to connect your Google Colab Ollama instance to your local backend.

In [5]:
# Install tunnel dependencies
import subprocess
subprocess.check_call(['pip', 'install', 'pyngrok', 'python-dotenv'])

import os
from dotenv import load_dotenv
from pyngrok import ngrok
import time

print("Setting up ngrok tunnel authentication...")
print("\nSetup Instructions:")
print("1. Register at: https://dashboard.ngrok.com/signup")
print("2. Copy authtoken from: https://dashboard.ngrok.com/get-started/your-authtoken")
print("3. Create environment file:")
print("   !echo 'NGROK_AUTH_TOKEN=your_token_here' > /content/.env")
print("   Replace 'your_token_here' with actual token")

# Load token from environment file
load_dotenv('/content/.env')
token = os.getenv('NGROK_AUTH_TOKEN')

if not token:
    print("\nERROR: NGROK_AUTH_TOKEN not found in environment file")
    print("\nTo fix:")
    print("1. Create environment file with token:")
    print("   !echo 'NGROK_AUTH_TOKEN=your_actual_token' > /content/.env")
    print("2. Run this cell again")
    print("\nExample .env content:")
    print("   NGROK_AUTH_TOKEN=2abc123def456ghi789jkl0mno1pqr2_3StUvWxYz4AbCdEfGhIj")

else:
    print(f"Auth token found: {token[:8]}...")

    try:
        ngrok.set_auth_token(token)
        print("Ngrok authentication successful")

        # Create tunnel for Ollama service
        print("\nCreating tunnel to Ollama port 11434...")
        tunnel_url = ngrok.connect(11434)

        print(f"\nOllama API accessible at: {tunnel_url}")
        print(f"Add to your backend configuration:")
        print(f"   OLLAMA_BASE_URL='{tunnel_url}'")

        # Test tunnel connectivity
        print(f"\nTesting tunnel connection...")
        import requests
        try:
            response = requests.get(f"{tunnel_url}/api/tags", timeout=10)
            if response.status_code == 200:
                print("Tunnel test successful - backend can connect")
            else:
                print(f"Tunnel created but service returned: {response.status_code}")
        except Exception as test_error:
            print(f"Tunnel test failed: {test_error}")
            print("This may be normal during Ollama startup")

        print(f"\nKeep this session running for tunnel access")
        print(f"Active tunnel: {tunnel_url}")

    except Exception as setup_error:
        print(f"Ngrok setup failed: {setup_error}")
        print("\nTroubleshooting:")
        print("- Check token validity")
        print("- Verify ngrok account status")
        print("- Try runtime restart")

Setting up ngrok tunnel authentication...

Setup Instructions:
1. Register at: https://dashboard.ngrok.com/signup
2. Copy authtoken from: https://dashboard.ngrok.com/get-started/your-authtoken
3. Create environment file:
   !echo 'NGROK_AUTH_TOKEN=your_token_here' > /content/.env
   Replace 'your_token_here' with actual token

ERROR: NGROK_AUTH_TOKEN not found in environment file

To fix:
1. Create environment file with token:
   !echo 'NGROK_AUTH_TOKEN=your_actual_token' > /content/.env
2. Run this cell again

Example .env content:
   NGROK_AUTH_TOKEN=2abc123def456ghi789jkl0mno1pqr2_3StUvWxYz4AbCdEfGhIj


## 5. Connect to Local Backend

Instructions for connecting your local pharmaceutical SOP backend to this Colab Ollama instance.

In [6]:
# Connect Colab Ollama to local backend
print("Backend Connection Setup")
print("=" * 30)

print("\nConnection steps:")
print("1. Copy ngrok URL from above to your backend .env:")
print("   OLLAMA_BASE_URL='https://abc123.ngrok.io'")

print("\n2. Navigate to your project directory:")
print("   cd sop-author-pharmaceutical/backend")

print("\n3. Setup Python environment:")
print("   python -m venv venv")
print("   venv\\Scripts\\activate    # Windows")
print("   source venv/bin/activate  # Mac/Linux")

print("\n4. Install dependencies and start server:")
print("   pip install -r requirements.txt")
print("   uvicorn app.main:app --reload --port 9000")

print("\n5. Access points:")
print("   Frontend: http://localhost:5173/")
print("   API docs: http://localhost:9000/docs")

print("\nNote: Keep this Colab session active while using local app")
print("Session typically lasts 12 hours")

Backend Connection Setup

Connection steps:
1. Copy ngrok URL from above to your backend .env:
   OLLAMA_BASE_URL='https://abc123.ngrok.io'

2. Navigate to your project directory:
   cd sop-author-pharmaceutical/backend

3. Setup Python environment:
   python -m venv venv
   venv\Scripts\activate    # Windows
   source venv/bin/activate  # Mac/Linux

4. Install dependencies and start server:
   pip install -r requirements.txt
   uvicorn app.main:app --reload --port 9000

5. Access points:
   Frontend: http://localhost:5173/
   API docs: http://localhost:9000/docs

Note: Keep this Colab session active while using local app
Session typically lasts 12 hours


## 6. Test Ollama Generation

Test pharmaceutical SOP generation with the installed model.

In [7]:
# Test Ollama model generation
import subprocess

def run_model_test(model_name, test_prompt):
    """Simple test of model generation"""
    try:
        cmd = ['ollama', 'run', model_name]
        result = subprocess.run(cmd, input=test_prompt,
                              capture_output=True, text=True, timeout=30)
        if result.returncode == 0:
            return result.stdout.strip()
        else:
            return "Generation failed"
    except subprocess.TimeoutExpired:
        return "Test timed out"
    except Exception:
        return "Test error"

# Test pharmaceutical SOP generation
sample_prompt = "Create a brief SOP outline for equipment cleaning validation in pharmaceutical manufacturing."

print("Testing model generation...")
print(f"Model: mistral:7b-instruct")
print(f"Prompt: {sample_prompt[:60]}...")

test_output = run_model_test('mistral:7b-instruct', sample_prompt)

print("\nGeneration result:")
if len(test_output) > 300:
    print(test_output[:300] + "...")
else:
    print(test_output)

# Simple status check
if "failed" in test_output.lower() or "error" in test_output.lower():
    print("\nStatus: Model test failed - check Ollama installation")
else:
    print("\nStatus: Model working correctly")
    print("Environment ready for pharmaceutical SOP generation")

Testing model generation...
Model: mistral:7b-instruct
Prompt: Create a brief SOP outline for equipment cleaning validation...

Generation result:
Title: Standard Operating Procedure (SOP) for Equipment Cleaning Validation in Pharmaceutical Manufacturing

1. **Introduction**
   - Purpose of the SOP
   - References and applicable guidelines
   - Scope and limitations

2. **Definitions, Abbreviations, and Acronyms**
   - Clarification of key ter...

Status: Model working correctly
Environment ready for pharmaceutical SOP generation
