In [1]:
import pandas as pd
import numpy as np

# Set a seed for reproducibility
np.random.seed(42)

N_SAMPLES = 1000

# 1. Generate Features
data = {
    'Daily_Commute_Miles': np.random.uniform(5, 70, N_SAMPLES).round(2),
    'Battery_Capacity_kWh': np.random.choice([25, 40, 60, 80, 100], N_SAMPLES),
    'Charging_Cost_per_kWh': np.random.uniform(0.10, 0.35, N_SAMPLES).round(3),
    'Temperature_Avg_C': np.random.uniform(5, 30, N_SAMPLES).round(1),
    'User_Behavior_Efficiency': np.random.uniform(0.8, 1.2, N_SAMPLES).round(2)
}

df = pd.DataFrame(data)

# 2. Generate the Target Variable (Monthly Expense)
# Formula: (Daily_Commute_Miles / Typical_Efficiency) * Charging_Cost_per_kWh * 30 days
# Typical_Efficiency (e.g., 3 miles per kWh)
# A typical vehicle might use 1 kWh every 3 miles.

# Calculate estimated kWh consumed daily
df['Daily_kWh_Consumed'] = (df['Daily_Commute_Miles'] / 3.0) * df['User_Behavior_Efficiency']

# Calculate monthly expense
df['Monthly_Charge_Expense_USD'] = (
    df['Daily_kWh_Consumed'] * df['Charging_Cost_per_kWh'] * 30
)

# Add some random noise to make it realistic
df['Monthly_Charge_Expense_USD'] = df['Monthly_Charge_Expense_USD'] * np.random.uniform(0.9, 1.1, N_SAMPLES)

# Final formatting
df['Monthly_Charge_Expense_USD'] = df['Monthly_Charge_Expense_USD'].round(2)

print(df.head())
print(f"\nGenerated a dummy dataset with {len(df)} samples.")

# Save the dummy data to a CSV file for consistency
df.to_csv('../Notebooks/dummy_charging_data.csv', index=False)

ModuleNotFoundError: No module named 'pandas'