In [None]:
# =========================================================
# 0. INSTALL (KAGGLE SAFE)
# =========================================================
!pip install -q transformers accelerate

# =========================================================
# 1. IMPORTS
# =========================================================
import torch
import pandas as pd
from collections import deque
import re
from transformers import AutoTokenizer, AutoModelForCausalLM

# =========================================================
# 2. GPU CHECK
# =========================================================
print("CUDA available:", torch.cuda.is_available())
print("GPU:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "NO GPU")

# =========================================================
# 3. GAME CONSTANTS
# =========================================================
WEEKS = list(range(1, 16))
PLAYERS = ["Retailer", "Wholesaler", "Distributor", "Factory"]

DEMAND = {
    1: 15, 2: 15, 3: 15, 4: 15,
    5: 25, 6: 25, 7: 25, 8: 80,
    9: 30, 10: 18, 11: 12, 12: 22,
    13: 35, 14: 18, 15: 16
}

INITIAL_INVENTORY = 15

SCENARIOS = {
    "baseline": {"shipping_delay": 2},
    "info_share": {"shipping_delay": 2},
    "lean": {"shipping_delay": 0},
    "lean_info_share": {"shipping_delay": 0},
}

# =========================================================
# 4. BEER GAME ENVIRONMENT (WITH HISTORY)
# =========================================================
class BeerGameEnv:
    def __init__(self, shipping_delay):
        self.inventory = {p: INITIAL_INVENTORY for p in PLAYERS}
        self.backlog = {p: 0 for p in PLAYERS}
        self.orders = {p: [] for p in PLAYERS}

        self.inventory_hist = {p: [] for p in PLAYERS}
        self.backlog_hist = {p: [] for p in PLAYERS}

        self.pipeline = (
            {p: deque([INITIAL_INVENTORY] * shipping_delay) for p in PLAYERS}
            if shipping_delay > 0 else None
        )

    def receive_shipments(self):
        if self.pipeline:
            for p in PLAYERS:
                self.inventory[p] += self.pipeline[p].popleft()

    def fulfill_demand(self, observed):
        for p in PLAYERS:
            total = observed[p] + self.backlog[p]
            shipped = min(self.inventory[p], total)
            self.inventory[p] -= shipped
            self.backlog[p] = total - shipped

            self.inventory_hist[p].append(self.inventory[p])
            self.backlog_hist[p].append(self.backlog[p])

    def place_orders(self, decisions):
        for p in PLAYERS:
            order = max(0, int(decisions[p]))
            self.orders[p].append(order)
            if self.pipeline:
                self.pipeline[p].append(order)

# =========================================================
# 5. LOAD LLM
# =========================================================
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    device_map="auto"
)

print("Model loaded on:", model.device)

# =========================================================
# 6. DECISION PARSER (STABLE)
# =========================================================
def llm_decide(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=60,
        do_sample=True,
        temperature=0.75,   # calm variability
        top_p=0.95
    )
    text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    match = re.search(r"Order\s*=\s*(\d+)", text)
    if match:
        return int(match.group(1))
    return 0

# =========================================================
# 7. CALM HUMAN PROMPT (BOUNDED RATIONALITY)
# =========================================================
def llm_agent(player, inventory, backlog, incoming, observed):
    prompt = f"""
You are a human player participating in the Beer Game as the {player}.

You do not know future customer demand.
You have limited understanding of delays and system-wide effects.
You try to make reasonable decisions based on recent observations.

Your goal is to:
- Meet incoming demand as well as possible
- Avoid excessive backlog
- Avoid holding extremely high inventory

Current situation:
Inventory: {inventory}
Backlog: {backlog}
Incoming shipment next week: {incoming}
Order received this week: {observed}

Guidelines:
- If backlog is increasing, you may increase orders gradually.
- If inventory is high and backlog is low, you may reduce orders.
- Avoid extreme overreaction unless the situation clearly worsens.

Respond ONLY in this format:
Order = <number>
"""
    return llm_decide(prompt)

# =========================================================
# 8. RUN ONE SCENARIO
# =========================================================
def run_llm_scenario(scenario_name):
    shipping_delay = SCENARIOS[scenario_name]["shipping_delay"]
    env = BeerGameEnv(shipping_delay)
    last_orders = {p: INITIAL_INVENTORY for p in PLAYERS}

    for week in WEEKS:
        env.receive_shipments()

        observed = {}
        for i, p in enumerate(PLAYERS):
            if p == "Retailer":
                observed[p] = DEMAND[week]
            else:
                observed[p] = last_orders[PLAYERS[i - 1]]

        env.fulfill_demand(observed)

        decisions = {}
        for p in PLAYERS:
            incoming = env.pipeline[p][0] if env.pipeline else 0
            decisions[p] = llm_agent(
                p,
                env.inventory[p],
                env.backlog[p],
                incoming,
                observed[p]
            )

        env.place_orders(decisions)
        last_orders = decisions.copy()

    return env

# =========================================================
# 9. RUN ALL SCENARIOS + SAVE
# =========================================================
results = {}

for scenario in SCENARIOS:
    print(f"Running scenario: {scenario}")
    env = run_llm_scenario(scenario)
    results[scenario] = env

    rows = []
    for p in PLAYERS:
        for w in range(len(env.orders[p])):
            rows.append({
                "Scenario": scenario,
                "Week": w + 1,
                "Player": p,
                "Order": env.orders[p][w],
                "Inventory": env.inventory_hist[p][w],
                "Backlog": env.backlog_hist[p][w]
            })

    df = pd.DataFrame(rows)
    df.to_excel(f"LLM_Beer_Game_{scenario}_CALM.xlsx", index=False)

print("All CALM-HUMAN scenarios completed and saved.")
