# Phase 1 â€” Basic LLM Playground

This notebook introduces the fundamentals of LLM behavior:
- Prompt structure  
- Temperature control (deterministic vs creative)  
- Role prompting  
- Few-shot prompting  
- Reasoning scaffolds  
- Unified LLM runner  


In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
print("Environment Loaded")

Environment Loaded


In [2]:
from typing import Optional

MODEL_BACKEND = "gemini"  
# options: "openai", "ollama", "transformers", "gemini"


def run_llm(
    prompt: str,
    temperature: float = 0.0,
    max_tokens: int = 200,
    model: str = "gpt-4o-mini"
) -> str:
    """
    Unified LLM Runner for:
    - OpenAI
    - Gemini (via OpenAI-style client)
    - Ollama (local inference)
    - Transformers (offline HF models)
    """
    print(f"MYYYYYYYYYYYY MODDDDDDD ISSSSSSSS {model}")
    # ---------------------------
    # BACKEND 1: OpenAI
    # ---------------------------
    if MODEL_BACKEND == "openai":
        from openai import OpenAI
        client = OpenAI()   # uses OPENAI_API_KEY from env
        resp = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            temperature=temperature,
            max_tokens=max_tokens
        )
        return resp.choices[0].message["content"]

    # ---------------------------
    # BACKEND 2: GEMINI (Google)
    # ---------------------------
    if MODEL_BACKEND == "gemini":
        import os
        from openai import OpenAI

        GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

        client = OpenAI(
            api_key=GEMINI_API_KEY,
            base_url="https://generativelanguage.googleapis.com/v1beta/"
        )

        resp = client.chat.completions.create(
            model=model,  # gemini-2.5-flash
            messages=[{"role": "user", "content": prompt}],
            temperature=temperature,
            max_tokens=max_tokens
        )
        return resp.choices[0].message.content

    # ---------------------------
    # BACKEND 3: OLLAMA (Local LLM)
    # ---------------------------
    if MODEL_BACKEND == "ollama":
        import ollama
        resp = ollama.chat(
            model=model,
            messages=[{"role": "user", "content": prompt}]
        )
        return resp["message"]["content"]

    # ---------------------------
    # BACKEND 4: Transformers (HF Offline)
    # ---------------------------
    if MODEL_BACKEND == "transformers":
        from transformers import pipeline
        generator = pipeline("text-generation", model=model)
        out = generator(
            prompt,
            temperature=temperature,
            max_new_tokens=max_tokens
        )
        return out[0]["generated_text"]

    raise ValueError(f"Unknown backend: {MODEL_BACKEND}")


In [3]:
from dotenv import load_dotenv
import os

load_dotenv()

print("OPENAI:", os.getenv("OPENAI_API_KEY"))
print("GEMINI:", os.getenv("GEMINI_API_KEY"))
print("Backend:", os.getenv("MODEL_BACKEND"))

OPENAI: 
GEMINI: AIzaSyAv9YmxnU4sU0fWw-cu0CnPtR8dcy6z_FE
Backend: gemini


In [4]:
print(
    run_llm(
        "Explain temperature in LLMs in one short paragraph.",
        temperature=0.2,
        max_tokens=120,
        model="gemini-2.5-flash"
    )
)

MYYYYYYYYYYYY MODDDDDDD ISSSSSSSS gemini-2.5-flash
In LLMs, "
