In [7]:
import pandas as pd
import numpy as np
import random

# Define sample locations and coordinates (Pune)
locations = [
    ("Shivaji Nagar", 18.5300, 73.8500),
    ("Kothrud", 18.5074, 73.8077),
    ("Viman Nagar", 18.5679, 73.9143),
    ("Hadapsar", 18.5089, 73.9235),
    ("Hinjewadi", 18.5911, 73.7389),
    ("Baner", 18.5590, 73.7862),
    ("Swargate", 18.5018, 73.8636),
    ("Aundh", 18.5579, 73.8077),
    ("Yerwada", 18.5626, 73.8770),
    ("Camp", 18.5167, 73.8567),
]

area_types = ["Commercial", "Residential", "Mixed", "IT Hub"]

# Number of records
num_entries = 10000
data = []

# Generate dummy entries
for i in range(num_entries):
    atm_id = f"ATM{i:05d}"
    loc = random.choice(locations)
    area = random.choice(area_types)
    
    # Transaction volume follows a skewed distribution
    transactions = max(0, int(np.random.normal(loc=1000, scale=1500)))
    
    # Withdrawals between 500 and 8000
    avg_withdrawal = max(500, int(np.random.normal(loc=3000, scale=800)))
    
    # Recent vs old usage
    last_used_days = max(0, int(np.random.exponential(scale=30)))
    
    data.append([
        atm_id, loc[0], loc[1], loc[2], area, "Pune",
        transactions, avg_withdrawal, "Active", last_used_days
    ])

# Column names
columns = [
    "ATM_ID", "Location", "Latitude", "Longitude", "Area_Type", "City",
    "Monthly_Transactions", "Avg_Withdrawal_Amount", "ATM_Status", "Last_Used_Days_Ago"
]

# Create DataFrame and save to CSV
atm_df = pd.DataFrame(data, columns=columns)
atm_df.to_csv("atm_pune_dataset.csv", index=False)

print("Dataset 'atm_pune_dataset.csv' generated with 10,000 entries.")


Dataset 'atm_pune_dataset.csv' generated with 10,000 entries.
