# Answer Evaluator Model Server on Google Colab

This notebook hosts your fine-tuned Llama model as a Flask API server accessible from your React Native app.

## Setup Steps:
1. Upload your model to Hugging Face or Google Drive
2. Run all cells in order
3. Copy the ngrok URL and update your React Native app
4. Test the endpoints

## üì¶ Install Dependencies

In [None]:
# Install required packages
!pip install transformers torch accelerate flask flask-cors pyngrok
!pip install bitsandbytes  # For 4-bit quantization

# Import libraries
from flask import Flask, request, jsonify
from flask_cors import CORS
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
import json
import re
import threading
from pyngrok import ngrok
import time

## ü§ñ Load Your Fine-tuned Model

In [None]:
# Model configuration
MODEL_NAME = "Sathvik19/Answer-Evaluator-Model"  # Your HuggingFace model
# OR if you want to load from local files:
# MODEL_NAME = "/content/drive/MyDrive/your_model_path"

print("Loading model and tokenizer...")

# Configure 4-bit quantization for memory efficiency
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load model with quantization
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True
)

print(f"‚úÖ Model loaded successfully!")
print(f"Model device: {model.device if hasattr(model, 'device') else 'distributed'}")
print(f"Memory usage: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")

## üõ†Ô∏è Helper Functions

In [None]:
def generate_response(prompt, max_new_tokens=200, temperature=0.7):
    """Generate response from the model"""
    try:
        # Tokenize input
        inputs = tokenizer(
            prompt, 
            return_tensors="pt", 
            truncation=True, 
            max_length=1024
        )
        
        # Move to GPU if available
        inputs = {k: v.to(model.device if hasattr(model, 'device') else 'cuda') for k, v in inputs.items()}
        
        # Generate response
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_new_tokens,
                temperature=temperature,
                do_sample=True,
                top_p=0.9,
                pad_token_id=tokenizer.eos_token_id,
                repetition_penalty=1.1
            )
        
        # Decode response
        response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Remove the input prompt from response
        if prompt in response_text:
            response_text = response_text.replace(prompt, "").strip()
        
        return response_text
    
    except Exception as e:
        print(f"Generation error: {e}")
        return f"Error generating response: {str(e)}"

def extract_json_from_text(text):
    """Extract JSON from model response"""
    try:
        # Look for JSON pattern
        json_pattern = r'\{[^{}]*\}'
        matches = re.findall(json_pattern, text)
        
        for match in matches:
            try:
                # Try to parse as JSON
                parsed = json.loads(match)
                return parsed
            except json.JSONDecodeError:
                continue
        
        # If no valid JSON found, try to extract values manually
        mark_match = re.search(r'(?:mark|score)\s*[:\-]?\s*(\d+)', text, re.IGNORECASE)
        mark = int(mark_match.group(1)) if mark_match else 0
        
        return {
            "markAwarded": mark,
            "explanation": text[:200] + "..." if len(text) > 200 else text
        }
    
    except Exception as e:
        return {
            "markAwarded": 0,
            "explanation": f"Error parsing response: {str(e)}"
        }

def extract_boolean_from_text(text):
    """Extract boolean response for question comparison"""
    try:
        # Look for JSON first
        json_pattern = r'\{[^{}]*\}'
        match = re.search(json_pattern, text)
        
        if match:
            try:
                parsed = json.loads(match.group())
                return parsed.get("isSame", False)
            except json.JSONDecodeError:
                pass
        
        # Fallback to text analysis
        text_lower = text.lower()
        if any(word in text_lower for word in ['yes', 'true', 'same', 'identical']):
            return True
        return False
    
    except Exception:
        return False

print("‚úÖ Helper functions defined")

## üåê Flask API Server

In [None]:
# Create Flask app
app = Flask(__name__)
CORS(app)  # Enable CORS for React Native

@app.route('/', methods=['GET'])
def home():
    return jsonify({
        "status": "online",
        "message": "Answer Evaluator API is running",
        "model": MODEL_NAME,
        "endpoints": [
            "/compare - POST - Compare student answer with model answer",
            "/compare-questions - POST - Compare if two questions are the same",
            "/health - GET - Health check"
        ]
    })

@app.route('/health', methods=['GET'])
def health():
    return jsonify({
        "status": "healthy",
        "gpu_available": torch.cuda.is_available(),
        "gpu_memory": f"{torch.cuda.memory_allocated() / 1024**3:.2f} GB" if torch.cuda.is_available() else "N/A"
    })

@app.route('/compare', methods=['POST'])
def compare_answers():
    try:
        data = request.json
        student_answer = data.get('studentAnswer', '')
        model_answer = data.get('modelAnswer', '')
        max_mark = int(data.get('maxMark', 5))
        
        # Create evaluation prompt
        prompt = f"""You are a fair and accurate exam evaluator. Compare the student's answer with the model answer and provide a score.

EVALUATION GUIDELINES:
- If answers are IDENTICAL or convey the EXACT SAME MEANING: Award FULL marks ({max_mark})
- If answers cover the same concepts with similar depth: Award FULL marks ({max_mark})
- If answers are substantially correct but missing minor details: Deduct 0-1 marks
- If answers are partially correct but missing key concepts: Award partial marks
- If answers are incorrect or completely different: Award 0 marks

Model Answer: "{model_answer}"
Student Answer: "{student_answer}"

Evaluate out of {max_mark} marks and provide your response in this exact JSON format:
{{"markAwarded": number, "explanation": "string"}}

Response:"""
        
        # Generate response
        response_text = generate_response(prompt, max_new_tokens=150, temperature=0.3)
        
        # Extract JSON
        result = extract_json_from_text(response_text)
        
        # Ensure mark is within bounds
        result['markAwarded'] = max(0, min(int(result.get('markAwarded', 0)), max_mark))
        
        return jsonify(result)
    
    except Exception as e:
        return jsonify({
            "markAwarded": 0,
            "explanation": f"Error processing request: {str(e)}"
        }), 500

@app.route('/compare-questions', methods=['POST'])
def compare_questions():
    try:
        data = request.json
        student_question = data.get('studentQuestion', '')
        model_question = data.get('modelQuestion', '')
        
        # Create comparison prompt
        prompt = f"""Determine if these two questions are asking the same thing. Ignore:
- Extra spaces
- Minor punctuation differences (periods, commas)
- Question numbering format (Q.8 vs Q. 8. vs Q8 vs 8.)
- Case differences

Question 1: "{model_question}"
Question 2: "{student_question}"

Respond with JSON in this exact format:
{{"isSame": boolean, "explanation": "string"}}

Response:"""
        
        # Generate response
        response_text = generate_response(prompt, max_new_tokens=100, temperature=0.1)
        
        # Extract boolean result
        is_same = extract_boolean_from_text(response_text)
        
        return jsonify({
            "isSame": is_same,
            "explanation": response_text[:200] + "..." if len(response_text) > 200 else response_text
        })
    
    except Exception as e:
        return jsonify({
            "isSame": False,
            "explanation": f"Error processing request: {str(e)}"
        }), 500

print("‚úÖ Flask app created")

## üöÄ Start Server with ngrok

In [None]:
# Set up ngrok (you might need to sign up for a free account at ngrok.com)
# Uncomment and add your ngrok auth token if needed:
# ngrok.set_auth_token("your_ngrok_auth_token_here")

# Start ngrok tunnel
public_url = ngrok.connect(5000)
print(f"üåê Public URL: {public_url}")
print(f"üì± Use this URL in your React Native app: {public_url}")

# Start Flask server in a thread
def run_server():
    app.run(port=5000, debug=False, use_reloader=False)

server_thread = threading.Thread(target=run_server)
server_thread.daemon = True
server_thread.start()

print("‚úÖ Server started!")
print("\nüß™ Test endpoints:")
print(f"Health: {public_url}/health")
print(f"Compare: {public_url}/compare")
print(f"Compare Questions: {public_url}/compare-questions")

# Keep the server running
print("\nüîÑ Server is running... Keep this cell running to maintain the server.")
print("üìã Copy the public URL above to use in your React Native app.")

## üß™ Test the API Endpoints

In [None]:
import requests

# Get the public URL (replace with your actual ngrok URL)
base_url = str(public_url)

print(f"Testing API at: {base_url}")

# Test 1: Health check
try:
    response = requests.get(f"{base_url}/health")
    print(f"\n‚úÖ Health Check: {response.json()}")
except Exception as e:
    print(f"‚ùå Health Check Failed: {e}")

# Test 2: Answer comparison
try:
    test_data = {
        "studentAnswer": "The capital of France is Paris. It is a beautiful city.",
        "modelAnswer": "Paris is the capital of France.",
        "maxMark": 5
    }
    
    response = requests.post(f"{base_url}/compare", json=test_data)
    print(f"\n‚úÖ Answer Comparison: {response.json()}")
except Exception as e:
    print(f"‚ùå Answer Comparison Failed: {e}")

# Test 3: Question comparison
try:
    test_data = {
        "studentQuestion": "What is the capital of France?",
        "modelQuestion": "What is the capital city of France?"
    }
    
    response = requests.post(f"{base_url}/compare-questions", json=test_data)
    print(f"\n‚úÖ Question Comparison: {response.json()}")
except Exception as e:
    print(f"‚ùå Question Comparison Failed: {e}")

## ‚è∞ Keep Server Alive

Run this cell to keep the server running and monitor its status.

In [None]:
# Keep the server alive and monitor
try:
    while True:
        time.sleep(30)  # Check every 30 seconds
        
        # Health check
        try:
            response = requests.get(f"{public_url}/health", timeout=5)
            if response.status_code == 200:
                print(f"üìä Server Status: Online - {time.strftime('%H:%M:%S')}")
            else:
                print(f"‚ö†Ô∏è Server Warning: HTTP {response.status_code}")
        except Exception as e:
            print(f"‚ùå Server Error: {e}")
            
except KeyboardInterrupt:
    print("\nüõë Server monitoring stopped")
except Exception as e:
    print(f"\n‚ùå Monitoring error: {e}")