In [None]:
import pandas as pd
import random

# List of all Indian states and UTs with approximate central coordinates
states_uts = [
    ("Andhra Pradesh", 15.9129, 79.7400), ("Arunachal Pradesh", 28.2180, 94.7278),
    ("Assam", 26.2006, 92.9376), ("Bihar", 25.0961, 85.3131),
    ("Chhattisgarh", 21.2787, 81.8661), ("Goa", 15.2993, 74.1240),
    ("Gujarat", 22.2587, 71.1924), ("Haryana", 29.0588, 76.0856),
    ("Himachal Pradesh", 31.1048, 77.1734), ("Jharkhand", 23.6102, 85.2799),
    ("Karnataka", 15.3173, 75.7139), ("Kerala", 10.8505, 76.2711),
    ("Madhya Pradesh", 22.9734, 78.6569), ("Maharashtra", 19.7515, 75.7139),
    ("Manipur", 24.6637, 93.9063), ("Meghalaya", 25.4670, 91.3662),
    ("Mizoram", 23.1645, 92.9376), ("Nagaland", 26.1584, 94.5624),
    ("Odisha", 20.9517, 85.0985), ("Punjab", 31.1471, 75.3412),
    ("Rajasthan", 27.0238, 74.2179), ("Sikkim", 27.5330, 88.5122),
    ("Tamil Nadu", 11.1271, 78.6569), ("Telangana", 18.1124, 79.0193),
    ("Tripura", 23.9408, 91.9882), ("Uttar Pradesh", 26.8467, 80.9462),
    ("Uttarakhand", 30.0668, 79.0193), ("West Bengal", 22.9868, 87.8550),
    ("Andaman and Nicobar Islands", 11.7401, 92.6586),
    ("Chandigarh", 30.7333, 76.7794),
    ("Dadra and Nagar Haveli and Daman and Diu", 20.1809, 73.0169),
    ("Delhi", 28.6139, 77.2090),
    ("Jammu and Kashmir", 33.7782, 76.5762),
    ("Ladakh", 34.2268, 77.5619),
    ("Lakshadweep", 10.5667, 72.6417),
    ("Puducherry", 11.9416, 79.8083)
]

# Categorize states/UTs by economic development (for demonstration purposes)
high_cost_states = ["Maharashtra", "Gujarat", "Karnataka", "Tamil Nadu", "Delhi", "Haryana", "Punjab"]
medium_cost_states = ["Andhra Pradesh", "Telangana", "Kerala", "Madhya Pradesh", "Rajasthan", "Uttar Pradesh", "West Bengal"]
low_cost_states = [state for state, lat, lon in states_uts if state not in high_cost_states + medium_cost_states]

# Function to generate industrial areas with intelligent lat-long
def generate_industrial_areas(state, lat, lon, num_areas):
    areas = []
    for i in range(num_areas):
        # Add some randomness to lat-long to spread industrial areas
        area_lat = lat + random.uniform(-0.5, 0.5)
        area_lon = lon + random.uniform(-0.5, 0.5)
        areas.append((f"{state}_Industrial_Area_{i+1}", area_lat, area_lon))
    return areas

# Function to generate data for a state or UT
def generate_state_data(state, lat, lon):
    if state in high_cost_states:
        base_land_cost = random.uniform(3000, 7000)
        base_labor_cost = random.uniform(400, 700)
        electricity_cost = random.uniform(7, 12)
        base_risk_factor = random.uniform(2, 4)
    elif state in medium_cost_states:
        base_land_cost = random.uniform(1500, 4000)
        base_labor_cost = random.uniform(300, 500)
        electricity_cost = random.uniform(6, 10)
        base_risk_factor = random.uniform(2.5, 4.5)
    else:
        base_land_cost = random.uniform(500, 2500)
        base_labor_cost = random.uniform(200, 400)
        electricity_cost = random.uniform(5, 9)
        base_risk_factor = random.uniform(3, 5)
    
    is_ut = state in ["Andaman and Nicobar Islands", "Chandigarh", "Dadra and Nagar Haveli and Daman and Diu", 
                      "Delhi", "Jammu and Kashmir", "Ladakh", "Lakshadweep", "Puducherry"]
    num_areas = 1 if is_ut else 10
    data = []
    
    for area, area_lat, area_lon in generate_industrial_areas(state, lat, lon, num_areas):
        land_cost = base_land_cost * random.uniform(0.9, 1.1)
        labor_cost = base_labor_cost * random.uniform(0.9, 1.1)
        risk_factor = base_risk_factor * random.uniform(0.9, 1.1)
        
        data.append({
            "State/UT": state,
            "Industrial Area": area,
            "Land Cost (INR/sq ft)": round(land_cost, 2),
            "Labour Cost (INR/day)": round(labor_cost, 2),
            "Electricity Cost (INR/unit)": round(electricity_cost, 2),
            "Risk Factor (1-5)": round(min(max(risk_factor, 1), 5), 2),
            "Latitude": area_lat,
            "Longitude": area_lon
        })
    
    return data

# Generate dataset
dataset = []
for state, lat, lon in states_uts:
    dataset.extend(generate_state_data(state, lat, lon))

# Convert to DataFrame
df = pd.DataFrame(dataset)

# Display the first few rows and some summary statistics
print(df.head(20))
print("\nDataset Summary:")
print(df.describe())

# Save to CSV
df.to_csv("indian_industrial_areas_dataset_combined.csv", index=False)
print("\nCombined dataset saved to 'indian_industrial_areas_dataset_combined.csv'")
