# Normal Distribution

In [21]:
import pandas as pd
import numpy as np

def generate_gaussian_bandit_csv(
    n_arms=5,
    pulls_per_arm=1000,
    std=0.1,
    cost_range=(0.05, 0.1),
    output_file="/content/drive/MyDrive/Business Analytics/Dataset/D4/Case1_Normal.csv",
    seed=42,
    means=None
):
    # Set random seed for reproducibility
    np.random.seed(seed)

    # Generate or validate reward means for each arm
    if means is None:
        means = np.linspace(0.3, 0.8, n_arms)
    else:
        assert len(means) == n_arms, "Length of means must equal n_arms"

    # 🔧 Assign a single fixed cost for each arm
    fixed_costs = np.round(np.random.uniform(*cost_range, size=n_arms), 3)

    # List to store each pull's data
    data = []

    # Simulate each trial (pull)
    for trial in range(pulls_per_arm):
        for arm in range(n_arms):
            # Generate a clamped reward from a Gaussian distribution
            reward = np.random.normal(loc=means[arm], scale=std)
            reward = max(min(reward, 1.0), 0.0)

            # Use the preassigned fixed cost for this arm
            cost = fixed_costs[arm]

            # Add this record to the data list
            data.append({
                "trial": trial,
                "arm": arm,
                "reward": reward,
                "cost": cost
            })

    # Convert list of dictionaries to DataFrame
    df = pd.DataFrame(data)

    # (Optional) Save to CSV — currently commented out
    df.to_csv(output_file, index=False)
    print(f"✅ Synthetic bandit data generated for {output_file}")

    # Show best arm based on reward mean
    best_arm_index = np.argmax(means)
    print(f"🎯 Best Arm: arm {best_arm_index} with mean reward {means[best_arm_index]:.3f}")

    # Group by arm and calculate mean reward and cost
    summary_df = df.groupby("arm").agg(
        mean_reward=('reward', 'mean'),
        mean_cost=('cost', 'mean')
    ).reset_index()

    # Calculate mean reward per arm
    mean_rewards = df.groupby('arm')['reward'].mean()

    # Format the output as "Arm0:Reward, Arm1:Reward, ..."
    formatted_output = ", ".join([f"Arm{arm}: {reward:.4f}" for arm, reward in mean_rewards.items()])

    # Print the final result
    print("\n🔍 Formatted Output:")
    print(formatted_output)


    # Show summary statistics for each arm
    print("\n📊 Summary statistics per arm:")
    print(summary_df)

    # Also show the fixed costs directly
    print("\n💰 Fixed cost assigned to each arm:")
    for arm_id, cost in enumerate(fixed_costs):
        print(f"  Arm {arm_id}: {cost}")

    return df

# Example usage:
df = generate_gaussian_bandit_csv(
    n_arms=5,
    pulls_per_arm=10000,
    std=0.1,
    cost_range=(0.03, 0.1),
    output_file="/content/drive/MyDrive/Business Analytics/Dataset/D4/Case4_Normal.csv",
    means=[0.71, 0.75, 0.67, 0.44, 0.53]  # Custom means for better control
)

# View sample
df.head()


✅ Synthetic bandit data generated for /content/drive/MyDrive/Business Analytics/Dataset/D4/Case4_Normal.csv
🎯 Best Arm: arm 1 with mean reward 0.750

🔍 Formatted Output:
Arm0: 0.7100, Arm1: 0.7507, Arm2: 0.6703, Arm3: 0.4406, Arm4: 0.5290

📊 Summary statistics per arm:
   arm  mean_reward  mean_cost
0    0     0.710000      0.056
1    1     0.750699      0.097
2    2     0.670286      0.081
3    3     0.440579      0.072
4    4     0.528983      0.041

💰 Fixed cost assigned to each arm:
  Arm 0: 0.056
  Arm 1: 0.097
  Arm 2: 0.081
  Arm 3: 0.072
  Arm 4: 0.041


Unnamed: 0,trial,arm,reward,cost
0,0,0,0.737904,0.056
1,0,1,0.851052,0.097
2,0,2,0.611912,0.081
3,0,3,0.387483,0.072
4,0,4,0.472862,0.041


# Exponential Distribution

In [1]:
import pandas as pd
import numpy as np

def generate_exponential_bandit_csv(
    n_arms=5,
    pulls_per_arm=1000,
    scale_params=None,
    cost_range=(0.05, 0.1),
    seed=42,
    output_file=None  # Optional CSV output
):
    np.random.seed(seed)

    # Set default exponential scale (1/lambda) for each arm if not provided
    if scale_params is None:
        scale_params = np.linspace(0.5, 2.0, n_arms)
    else:
        assert len(scale_params) == n_arms, "Length of scale_params must equal n_arms"

    # Assign fixed cost to each arm
    fixed_costs = np.round(np.random.uniform(*cost_range, size=n_arms), 3)

    # Generate data
    data = []
    for trial in range(pulls_per_arm):
        for arm in range(n_arms):
            reward = np.random.exponential(scale=scale_params[arm])
            reward = min(reward, 1.0)  # Clamp to max of 1.0 for bounded rewards
            cost = fixed_costs[arm]
            data.append({
                "trial": trial,
                "arm": arm,
                "reward": reward,
                "cost": cost
            })

    df = pd.DataFrame(data)

    # Save to CSV if file path provided
    if output_file:
        df.to_csv(output_file, index=False)

    # Compute and format mean rewards per arm
    mean_rewards = df.groupby('arm')['reward'].mean()
    formatted_output = ", ".join([f"Arm{arm}: {reward:.4f}" for arm, reward in mean_rewards.items()])
    print(formatted_output)

    return df


In [4]:
df = generate_exponential_bandit_csv(
    n_arms=5,
    pulls_per_arm=10000,
    scale_params=[0.1, 0.19, 0.35, 0.77, 0.54],  # lower scale = more concentrated rewards
    cost_range=(0.03, 0.1),
    output_file="/content/drive/MyDrive/Business Analytics/Dataset/D4/Exponential/Case2_Exponential"
)

Arm0: 0.1005, Arm1: 0.1855, Arm2: 0.3293, Arm3: 0.5642, Arm4: 0.4517


In [5]:
df = generate_exponential_bandit_csv(
    n_arms=5,
    pulls_per_arm=5000,
    scale_params=[0.1, 0.9, 0.7, 0.5, 0.3],  # lower scale = more concentrated rewards
    cost_range=(0.03, 0.1),
    output_file="/content/drive/MyDrive/Business Analytics/Dataset/D4/Exponential/Case3_Exponential"
)

Arm0: 0.1008, Arm1: 0.5976, Arm2: 0.5334, Arm3: 0.4412, Arm4: 0.2869
