In [10]:
import pandas as pd
import numpy as np
import random
import math
from typing import List, Dict, Tuple

In [11]:
import pandas as pd

# Use the full, absolute path where your kagglehub download is located
file_path = '/home/gella.saikrishna/.cache/kagglehub/datasets/dataanalyst001/all-capital-cities-in-the-world/versions/1/all capital cities in the world.csv'

try:
    # 1. Load the data into a DataFrame
    # Note the use of the full path, which should work in your local VS Code environment.
    df = pd.read_csv(file_path)
    
    # 2. Print the first 5 rows (and all columns)
    print("First 5 entries of the dataset:")
    print(df.head())
    
    # 3. Optional: Print the column names and data types
    print("\n--- Column Information ---")
    print(df.info())

except FileNotFoundError:
    print(f"Error: File not found at the specified path: {file_path}")
    print("Please double-check the path or ensure the file was successfully downloaded.")

except Exception as e:
    print(f"An error occurred: {e}")

First 5 entries of the dataset:
   Sno Capital City               Country Continent Latitude Longitude
0    1    Abu Dhabi  United Arab Emirates      Asia   24.28N    54.22E
1    2        Abuja               Nigeria    Africa   09.05N    07.32E
2    3        Accra                 Ghana    Africa   05.35N    00.06W
3    4  Addis Ababa              Ethiopia    Africa   09.02N    38.42E
4    5      Algiers               Algeria    Africa   36.42N    03.08E

--- Column Information ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 196 entries, 0 to 195
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Sno           196 non-null    int64 
 1   Capital City  196 non-null    object
 2   Country       196 non-null    object
 3   Continent     196 non-null    object
 4   Latitude      196 non-null    object
 5   Longitude     196 non-null    object
dtypes: int64(1), object(5)
memory usage: 9.3+ KB
None


In [4]:
# --- Configuration Constants ---
C_UCB = 0.5
MODEL_CONFIG = {
    # Model-1: High cost/High reward prob (Simulated 3B)
    'Model-1': {'cost_per_token': 0.6, 'capacity': 3e9},
    # Model-2: Low cost/Low reward prob (Simulated 1B)
    'Model-2': {'cost_per_token': 0.1, 'capacity': 1e9}
}
REWARD_STRUCTURE = {
    # Assuming the total max reward of 70 in the final simulation output
    # corresponds to a scaled-up version of the original structure.
    # We will use the original structure for the logic and scale up the final result for presentation.
    'Capital City': 10, 'Continent': 10, 'Latitude': 25, 'Longitude': 25
}
MAX_REWARD = sum(REWARD_STRUCTURE.values()) # 70 points, matching the output
DF_FILE_NAME = '/home/gella.saikrishna/.cache/kagglehub/datasets/dataanalyst001/all-capital-cities-in-the-world/versions/1/all capital cities in the world.csv'
SIMULATION_ROUNDS = 200

In [5]:
# --- LLM Simulation Functions (using synthetic logic) ---

def simulate_llm_tokens_and_performance(country: str, model_name: str) -> Tuple[float, float, int]:
    """
    Simulates token usage, cost calculation, and reward for a single country query.
    
    Returns: (total_reward, total_cost, total_tokens)
    """
    
    # 1. Simulate Token Usage
    input_tokens = 50 
    country_length_factor = len(country) // 3
    output_tokens_base = 40 + country_length_factor
    output_tokens_variation = 5

    # Determine verbosity based on model size (FIXED: Numerical comparison)
    if MODEL_CONFIG[model_name]['capacity'] > 1e9: 
        # Larger model (Model-1: 3e9) might be slightly more verbose
        output_tokens = output_tokens_base + random.randint(0, output_tokens_variation)
    else:
        # Smaller model (Model-2: 1e9) might be slightly less verbose/more concise
        output_tokens = output_tokens_base + random.randint(-output_tokens_variation, 0)

    output_tokens = max(1, output_tokens)
    total_tokens = input_tokens + output_tokens

    # 2. Calculate Total Cost
    cost_per_token = MODEL_CONFIG[model_name]['cost_per_token']
    total_cost = total_tokens * cost_per_token

    # 3. Simulate Performance/Reward
    # Model-1 is better (90% base correctness), Model-2 is worse (70% base correctness)
    correctness_base = 0.9 if model_name == 'Model-1' else 0.7 
    total_reward = 0.0

    for _, reward_points in REWARD_STRUCTURE.items():
        # Introduce randomness around the base correctness probability
        correctness_prob = correctness_base + random.uniform(-0.1, 0.05)
        # Binary outcome: LLM is either correct (1) or incorrect (0)
        is_correct = 1 if random.random() < correctness_prob else 0
        total_reward += is_correct * reward_points
        
    return total_reward, total_cost, total_tokens


In [9]:


# --- UCB and Reverse Myerson Implementation ---

class LLMBanditSelector:
    """Implements UCB and Reverse Myerson's Virtual Valuation."""
    def __init__(self, models: List[str], c_ucb: float):
        self.models = models
        self.K = len(models)
        self.c_ucb = c_ucb
        self.t = 0
        
        # UCB State
        self.N_a = {model: 0 for model in models}
        self.Q_a = {model: 0.0 for model in models}
        
        # Reverse Myerson State
        self.a_history = {model: [] for model in models}  
        
    def _compute_ucb_index(self, model: str) -> float:
        """Calculates the UCB index for a given model (Q_t(a) + exploration_term)."""
        if self.N_a[model] == 0:
            return float('inf')
        
        # UCB Exploration Term: c * sqrt(ln(t) / N_a)
        exploration_term = self.c_ucb * math.sqrt(math.log(self.t) / self.N_a[model])
        ucb_index = self.Q_a[model] + exploration_term
        return ucb_index
    
    def _get_empirical_pdf_cdf(self, a_values: List[float], current_a: float, bin_count: int = 20) -> Tuple[float, float]:
        """Calculates empirical PDF and CDF values for the current 'a' based on history."""
        if not a_values:
            return 1.0, 0.5 

        a_all = np.array(a_values + [current_a])
        min_a, max_a = a_all.min(), a_all.max()
        
        if min_a == max_a:
            return 1.0, 1.0 if current_a >= min_a else 0.0
            
        bins = np.linspace(min_a, max_a, bin_count + 1)
        
        counts, bin_edges = np.histogram(a_values, bins=bins, density=False)
        total_samples = len(a_values)
        bin_width = bin_edges[1] - bin_edges[0]
        
        current_bin_index = np.digitize(current_a, bin_edges) - 1
        current_bin_index = np.clip(current_bin_index, 0, bin_count - 1)
        
        # Empirical PDF (density approximation)
        pdf_val = (counts[current_bin_index] / total_samples) / bin_width
        
        # Empirical CDF
        cdf_val = np.sum(counts[:current_bin_index + 1]) / total_samples
        
        pdf_val = max(pdf_val, 1e-6) # Prevent division by zero
        
        return pdf_val, cdf_val

    def _compute_virtual_valuation(self, a: float, model: str) -> float:
        """Reverse Myerson Virtual Valuation: a + (CDF(a) / PDF(a))"""
        a_history = self.a_history[model]
        pdf_a, cdf_a = self._get_empirical_pdf_cdf(a_history, a)
        
        # This formula is used for the valuation based on the bid 'a'
        virtual_valuation = a + (cdf_a / pdf_a)
        return virtual_valuation

    def select_model(self, country: str) -> str:
        """Selects the optimal model."""
        self.t += 1
        
        # Initial exploration: Ensure every arm is pulled once
        for model in self.models:
            if self.N_a[model] == 0:
                return model

        virtual_valuations: Dict[str, float] = {}
        
        for model in self.models:
            # 1. Simulate the hypothetical outcome (Reward, Cost)
            # NOTE: simulate_llm_tokens_and_performance must be defined globally
            # reward, cost, _ = simulate_llm_tokens_and_performance(country, model) 
            reward, cost, _ = 0, 0, 0 # Placeholder for missing function
            
            # 2. Calculate UCB Index and Exploration Term
            ucb_index = self._compute_ucb_index(model)
            exploration_term = ucb_index - self.Q_a[model] 
            
            # 3. Calculate 'a' value: a = Reward + UCB_Exploration_Term - Cost
            a = reward + exploration_term - cost
            
            # 4. Compute Reverse Myerson Virtual Valuation
            virtual_valuation = self._compute_virtual_valuation(a, model)
            virtual_valuations[model] = virtual_valuation
            
        # 5. Select the model with the LOWEST Virtual Valuation
        selected_model = min(virtual_valuations, key=virtual_valuations.get)
        
        return selected_model

    def update_model_stats(self, selected_model: str, reward: float, cost: float):
        """Updates the UCB and Reverse Myerson's history for the selected model."""
        
        # 1. UCB Update
        self.N_a[selected_model] += 1
        
        # Update empirical mean Q_t(a) (Incremental update formula)
        n = self.N_a[selected_model]
        old_q = self.Q_a[selected_model]
        new_q = old_q + (1 / n) * (reward - old_q)
        self.Q_a[selected_model] = new_q
        
        # 2. Calculate and update 'a' history for Myerson
        exploration_term = self.c_ucb * math.sqrt(math.log(self.t) / self.N_a[selected_model])
        a = reward + exploration_term - cost
        self.a_history[selected_model].append(a)

In [10]:
def run_simulation(num_rounds: int, df: pd.DataFrame, selector: LLMBanditSelector):
    """Runs the multi-armed bandit simulation."""
    total_revenue = 0.0
    total_cost = 0.0
    
    COUNTRY_LIST = df['Country'].tolist()

    print(f"--- Starting LLM Selection Simulation ---")
    print(f"Models: {selector.models}, UCB-C: {selector.c_ucb}, Rounds: {num_rounds}")
    print(f"Max Reward per round: {MAX_REWARD}\n")

    for i in range(1, num_rounds + 1):
        # 1. Choose a random country for the query
        country = random.choice(COUNTRY_LIST)
        
        # 2. Select the optimal model
        selected_model = selector.select_model(country)
        
        # 3. Simulate the performance (this is the actual *observation*)
        reward, cost, tokens = simulate_llm_tokens_and_performance(country, selected_model) 
        
        # 4. Update the selector state
        selector.update_model_stats(selected_model, reward, cost)

        total_revenue += reward
        total_cost += cost
        net_value = reward - cost
        
        if i % 20 == 0 or i == num_rounds:
            print(f"Round {i}: Model={selected_model}, R={reward:.2f}, C={cost:.2f}, Net={net_value:.2f}")
    print("\n--- Simulation Complete ---")
    print(f"Total Rounds: {num_rounds}")
    print(f"Total Revenue: {total_revenue:.2f}")
    print(f"Total Cost: {total_cost:.2f}")
    print(f"Net Profit (Revenue - Cost): {total_revenue - total_cost:.2f}")
    
    print("\n--- Model Pull Counts ---")
    for model, count in selector.N_a.items():
        print(f"{model}: {count} pulls")
        
    print("\n--- Final UCB Mean Rewards (Q_a) ---")
    for model, q_a in selector.Q_a.items():
        print(f"{model}: {q_a:.4f} average reward")
        
# --- Execution ---

if __name__ == '__main__':
    try:
        # 1. Load the synthetic dataset
        df_synthetic = pd.read_csv(DF_FILE_NAME)
    except FileNotFoundError:
        print(f"FATAL ERROR: Dataset not found at {DF_FILE_NAME}")
        sys.exit(1)

    # 2. Initialize the LLM Selector
    model_names = list(MODEL_CONFIG.keys())
    bandit_selector = LLMBanditSelector(models=model_names, c_ucb=C_UCB)

    # 3. Run the simulation
    run_simulation(SIMULATION_ROUNDS, df_synthetic, bandit_selector)

--- Starting LLM Selection Simulation ---
Models: ['Model-1', 'Model-2'], UCB-C: 0.5, Rounds: 200
Max Reward per round: 70

Round 20: Model=Model-2, R=60.00, C=8.70, Net=51.30
Round 40: Model=Model-2, R=70.00, C=9.20, Net=60.80
Round 60: Model=Model-2, R=35.00, C=9.30, Net=25.70
Round 80: Model=Model-2, R=70.00, C=8.80, Net=61.20
Round 100: Model=Model-2, R=25.00, C=9.00, Net=16.00
Round 120: Model=Model-2, R=35.00, C=8.80, Net=26.20
Round 140: Model=Model-2, R=35.00, C=8.70, Net=26.30
Round 160: Model=Model-2, R=60.00, C=9.10, Net=50.90
Round 180: Model=Model-2, R=10.00, C=9.20, Net=0.80
Round 200: Model=Model-2, R=45.00, C=9.10, Net=35.90

--- Simulation Complete ---
Total Rounds: 200
Total Revenue: 9560.00
Total Cost: 2183.40
Net Profit (Revenue - Cost): 7376.60

--- Model Pull Counts ---
Model-1: 8 pulls
Model-2: 192 pulls

--- Final UCB Mean Rewards (Q_a) ---
Model-1: 64.3750 average reward
Model-2: 47.1094 average reward


In [11]:
# Assuming all constants (MODEL_CONFIG, REWARD_STRUCTURE, SIMULATION_ROUNDS,
# DF_FILE_NAME, MAX_REWARD) and functions (simulate_llm_tokens_and_performance)
# are already defined from the previous cell.

def run_exclusive_model_simulation(num_rounds: int, df: pd.DataFrame, model_name: str):
    """
    Runs a simulation for a fixed number of rounds using only the specified model
    to calculate the total net profit.
    """
    total_revenue = 0.0
    total_cost = 0.0
    
    COUNTRY_LIST = df['Country'].tolist()

    print(f"--- Starting Exclusive Simulation: {model_name} ({num_rounds} Rounds) ---")
    
    for i in range(1, num_rounds + 1):
        # 1. Choose a random country for the query
        country = random.choice(COUNTRY_LIST)
        
        # 2. Simulate the performance (This is the actual *observation*)
        # The selected model is always the fixed model_name
        reward, cost, tokens = simulate_llm_tokens_and_performance(country, model_name) 
        
        total_revenue += reward
        total_cost += cost
        
        if i % 50 == 0 or i == num_rounds:
            net_value = reward - cost
            print(f"Round {i}: Model={model_name}, R={reward:.2f}, C={cost:.2f}, Net={net_value:.2f}")

    net_profit = total_revenue - total_cost
    
    print("\n--- Exclusive Simulation Complete ---")
    print(f"Model Used: {model_name}")
    print(f"Total Rounds: {num_rounds}")
    print(f"Total Revenue: {total_revenue:.2f}")
    print(f"Total Cost: {total_cost:.2f}")
    print(f"Net Profit (Revenue - Cost): {net_profit:.2f}")
    
    return net_profit

# --- Execution for Model-1 ---

if __name__ == '__main__':
    try:
        # Load the synthetic dataset (assuming DF_FILE_NAME is globally accessible)
        df_synthetic = pd.read_csv(DF_FILE_NAME)
    except NameError:
        print("Error: DF_FILE_NAME is not defined. Ensure you run the previous cell first.")
        sys.exit(1)
    except FileNotFoundError:
        print(f"FATAL ERROR: Dataset not found at {DF_FILE_NAME}")
        sys.exit(1)

    # Run simulation using only Model-1
    print("\n" + "="*60)
    profit_model_1 = run_exclusive_model_simulation(SIMULATION_ROUNDS, df_synthetic, 'Model-1')
    print("="*60)

    # Optional: Run simulation using only Model-2 for comparison
    print("\n" + "="*60)
    profit_model_2 = run_exclusive_model_simulation(SIMULATION_ROUNDS, df_synthetic, 'Model-2')
    print("="*60)
    
    print("\n--- Final Comparative Profit ---")
    print(f"Model-1 Exclusive Profit: {profit_model_1:.2f}")
    print(f"Model-2 Exclusive Profit: {profit_model_2:.2f}")


--- Starting Exclusive Simulation: Model-1 (200 Rounds) ---
Round 50: Model=Model-1, R=70.00, C=57.60, Net=12.40
Round 100: Model=Model-1, R=70.00, C=56.40, Net=13.60
Round 150: Model=Model-1, R=70.00, C=57.60, Net=12.40
Round 200: Model=Model-1, R=70.00, C=58.20, Net=11.80

--- Exclusive Simulation Complete ---
Model Used: Model-1
Total Rounds: 200
Total Revenue: 12230.00
Total Cost: 11397.00
Net Profit (Revenue - Cost): 833.00

--- Starting Exclusive Simulation: Model-2 (200 Rounds) ---
Round 50: Model=Model-2, R=10.00, C=8.90, Net=1.10
Round 100: Model=Model-2, R=70.00, C=9.20, Net=60.80
Round 150: Model=Model-2, R=70.00, C=8.80, Net=61.20
Round 200: Model=Model-2, R=70.00, C=9.10, Net=60.90

--- Exclusive Simulation Complete ---
Model Used: Model-2
Total Rounds: 200
Total Revenue: 9580.00
Total Cost: 1798.50
Net Profit (Revenue - Cost): 7781.50

--- Final Comparative Profit ---
Model-1 Exclusive Profit: 833.00
Model-2 Exclusive Profit: 7781.50
