In [5]:
import pandas as pd
import numpy as np

# Chennai bounding box
min_lat, max_lat = 12.8, 13.2
min_lon, max_lon = 80.0, 80.3
step_deg = 0.003  # ~333 meters → more points

# Generate coordinate grid
lat_points = np.arange(min_lat, max_lat, step_deg)
lon_points = np.arange(min_lon, max_lon, step_deg)
coordinates = [(lat, lon) for lat in lat_points for lon in lon_points]
np.random.shuffle(coordinates)

# Generate dataset
def generate_row(coord):
    lat, lon = coord
    # Feature assignment based on randomness
    if np.random.rand() < 0.5:
        crime_rate = np.random.randint(16, 26)
        security = 0
        lighting = np.random.choice([1, 1, 2])
    else:
        crime_rate = np.random.randint(1, 16)
        security = 1
        lighting = np.random.choice([2, 3])
    traffic = np.random.randint(1, 4)

    # Zone assignment logic
    if crime_rate > 15 and security == 0 and lighting < 2:
        zone_type = "Danger Zone"
    else:
        zone_type = "Safe Zone"

    return {
        "Latitude": lat,
        "Longitude": lon,
        "Crime Rate": crime_rate,
        "Security Presence": security,
        "Traffic Density": traffic,
        "Lighting Quality": lighting,
        "Zone Type": zone_type
    }

# Generate rows until we have enough of both zone types
dataset = []
used_coords = set()
safe_count = 0
danger_count = 0
target_per_class = 600  # we want at least 1200 rows total

for coord in coordinates:
    if coord in used_coords:
        continue
    row = generate_row(coord)
    if row["Zone Type"] == "Safe Zone" and safe_count < target_per_class:
        dataset.append(row)
        used_coords.add(coord)
        safe_count += 1
    elif row["Zone Type"] == "Danger Zone" and danger_count < target_per_class:
        dataset.append(row)
        used_coords.add(coord)
        danger_count += 1
    if safe_count >= target_per_class and danger_count >= target_per_class:
        break

# Create DataFrame
df = pd.DataFrame(dataset)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Save or display
df.to_csv("chennai_parking.csv", index=False)
print("Final Dataset Shape:", df.shape)
print(df["Zone Type"].value_counts())
print(df.head())


Final Dataset Shape: (1200, 7)
Zone Type
Danger Zone    600
Safe Zone      600
Name: count, dtype: int64
   Latitude  Longitude  Crime Rate  Security Presence  Traffic Density  \
0    12.998     80.141          24                  0                1   
1    12.908     80.171          25                  0                1   
2    12.989     80.213          16                  0                1   
3    13.016     80.267           6                  1                1   
4    12.824     80.156          25                  0                1   

   Lighting Quality    Zone Type  
0                 1  Danger Zone  
1                 1  Danger Zone  
2                 1  Danger Zone  
3                 3    Safe Zone  
4                 2    Safe Zone  
