In [2]:
import pandas as pd
import requests
import json
import time
from pathlib import Path
from typing import Dict, List, Optional


class OllamaGSM8K:
    # Model-specific prompt templates
    PROMPT_TEMPLATES = {
        "gemma:2b": """Solve this math problem step by step:

Question: {question}

Let's solve this step by step:""",

        "llama2": """[INST] You are a helpful math tutor. Solve this math problem step by step, showing all work clearly.

Question: {question}

Solution: [/INST]""",

        "mistral": """<s>[INST] Solve this math problem step by step, showing your work clearly.

Question: {question}

Let's break this down: [/INST]""",

        "neural-chat": """### System: You are a skilled math tutor helping students solve math problems step by step.

### User: Please solve this math problem:
{question}

### Assistant: I'll help you solve this step by step:""",

        "phi": """You are a math expert. Show the solution to this problem step by step.

Question: {question}

Solution:"""
    }

    def __init__(self, model="gemma:2b", temperature=0.7, max_tokens=1024):
        self.model = model
        self.temperature = temperature
        self.max_tokens = max_tokens
        self.api_url = "http://localhost:11434/api/generate"

        # Validate model selection
        if model not in self.PROMPT_TEMPLATES:
            available_models = list(self.PROMPT_TEMPLATES.keys())
            print(
                f"Warning: Model {model} not in known models: {available_models}")
            print("Using default template. You may want to add a custom template.")

    def list_available_models(self) -> List[str]:
        """Get list of available models from Ollama."""
        try:
            response = requests.get("http://localhost:11434/api/tags")
            if response.status_code == 200:
                models = [model['name'] for model in response.json()['models']]
                return models
            return []
        except requests.exceptions.RequestException:
            print("Error: Could not connect to Ollama API")
            return []

    def generate_prompt(self, question: str) -> str:
        """Generate model-specific prompt."""
        template = self.PROMPT_TEMPLATES.get(
            self.model,
            # Default template if model not found
            "Question: {question}\n\nLet's solve this step by step:"
        )
        return template.format(question=question)

    def call_ollama(self, prompt: str) -> Optional[str]:
        """Make API call to Ollama with configurable parameters."""
        try:
            response = requests.post(
                self.api_url,
                json={
                    "model": self.model,
                    "prompt": prompt,
                    "stream": False,
                    "options": {
                        "temperature": self.temperature,
                        "num_tokens": self.max_tokens
                    }
                }
            )
            response.raise_for_status()
            return response.json()['response']
        except requests.exceptions.RequestException as e:
            print(f"Error calling Ollama API: {e}")
            return None

    def solve_problem(self, question: str) -> Dict:
        """Solve a single GSM8K problem."""
        prompt = self.generate_prompt(question)
        response = self.call_ollama(prompt)
        return {
            'question': question,
            'solution': response,
            'model': self.model,
            'temperature': self.temperature
        }

    def batch_solve(self, questions: List[str], batch_size: int = 5) -> List[Dict]:
        """Solve multiple problems with delay between batches."""
        solutions = []
        for i in range(0, len(questions), batch_size):
            batch = questions[i:i + batch_size]
            print(f"Processing batch {i//batch_size + 1}...")

            for question in batch:
                solution = self.solve_problem(question)
                solutions.append(solution)
                time.sleep(1)  # Delay between problems

            if i + batch_size < len(questions):
                print("Waiting between batches...")
                time.sleep(5)  # Delay between batches

        return solutions


def compare_models(question: str, models: List[str] = None) -> pd.DataFrame:
    """Compare solutions from different models for the same problem."""
    if models is None:
        models = ["gemma:2b", "llama2", "mistral", "neural-chat", "phi"]

    results = []
    for model in models:
        solver = OllamaGSM8K(model=model)
        solution = solver.solve_problem(question)
        results.append(solution)
        time.sleep(2)  # Delay between model calls

    return pd.DataFrame(results)


def main():
    # Load your local GSM8K dataset
    try:
        train_df = pd.read_csv(
            'C:/Users/Nilofar/Desktop/ML_TermPaper_WIN2425/datasets/gsm8k_data/train.csv')
        print(f"Loaded {len(train_df)} problems from training set")
    except FileNotFoundError:
        print("Error: Please ensure your GSM8K dataset is in 'gsm8k_data/train.csv'")
        return

    # Get list of available models
    solver = OllamaGSM8K()
    available_models = solver.list_available_models()
    print("\nAvailable Ollama models:", available_models)

    # Test with different models
    test_question = train_df['question'].iloc[0]
    print("\nComparing model responses for the first problem:")
    print("Question:", test_question)

    # Test subset of models (adjust based on what you have installed)
    test_models = ["gemma:2b", "llama2"]  # Add or remove models as needed
    results_df = compare_models(test_question, test_models)

    # Display results
    print("\nResults comparison:")
    for _, row in results_df.iterrows():
        print(f"\nModel: {row['model']}")
        print("Solution:", row['solution'])
        print("-" * 50)

    # Example of saving results
    results_df.to_csv('model_comparison.csv', index=False)


if __name__ == "__main__":
    main()

Loaded 7473 problems from training set

Available Ollama models: ['mistral:latest', 'meditron:latest', 'llava:7b', 'qwen2.5-coder:7b', 'deepseek-r1:7b', 'gemma:2b', 'llama3.2:3b', 'deepseek-r1:1.5b', 'llama3:latest']

Comparing model responses for the first problem:
Question: Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?
Error calling Ollama API: 404 Client Error: Not Found for url: http://localhost:11434/api/generate

Results comparison:

Model: gemma:2b
Solution: **Step 1: Calculate the number of clips Natalia sold in April.**

48 friends x 2 clips per friend = 96 clips

**Step 2: Calculate the number of clips Natalia sold in May.**

96 clips (April) x 0.5 = 48 clips

**Step 3: Add the number of clips sold in April and May.**

96 + 48 = 144

Therefore, Natalia sold **144 clips** altogether in April and May.
--------------------------------------------------

Model: llama2
Sol