In [1]:
import pandas as pd
import numpy as np

np.random.seed(42)

N = 1000

# Generate IDs
claim_id = range(1, N + 1)

# Geographic coordinates (London area)
latitude = np.random.uniform(51.45, 51.60, N)
longitude = np.random.uniform(-0.30, 0.10, N)

# Claim details
claim_amount = np.random.gamma(shape=2.0, scale=700, size=N).round(2)
claim_type = np.random.choice(
    ["Vehicle", "Property", "Theft", "Flood"],
    size=N,
    p=[0.45, 0.30, 0.15, 0.10]
)

policy_type = np.random.choice(
    ["Standard", "Comprehensive", "Premium"],
    size=N,
    p=[0.4, 0.4, 0.2]
)

# Dates
incident_date = pd.to_datetime(
    np.random.choice(
        pd.date_range("2023-01-01", "2023-12-31"),
        size=N
    )
)

# Build DataFrame
df = pd.DataFrame({
    "claim_id": claim_id,
    "latitude": latitude,
    "longitude": longitude,
    "claim_amount": claim_amount,
    "claim_type": claim_type,
    "policy_type": policy_type,
    "incident_date": incident_date
})

# Business-driven features
df["high_risk_claim"] = (df["claim_amount"] > 1500).astype(int)

# Save dataset
df.to_csv("insurance_claims_1000.csv", index=False)

print("Fake insurance dataset created with", len(df), "rows")


Fake insurance dataset created with 1000 rows


In [2]:
import pandas as pd
import numpy as np

np.random.seed(42)

N_REGIONS = 12

region_id = range(1, N_REGIONS + 1)

region_name = [
    "Central London",
    "North London",
    "South London",
    "East London",
    "West London",
    "Camden",
    "Greenwich",
    "Hackney",
    "Islington",
    "Croydon",
    "Hounslow",
    "Barking"
]

# Approximate bounding boxes for regions
lat_center = np.random.uniform(51.47, 51.58, N_REGIONS)
lon_center = np.random.uniform(-0.25, 0.05, N_REGIONS)

avg_income = np.random.randint(30000, 85000, N_REGIONS)
flood_risk_index = np.random.uniform(0.1, 0.9, N_REGIONS).round(2)
crime_index = np.random.uniform(0.2, 0.8, N_REGIONS).round(2)

regions_df = pd.DataFrame({
    "region_id": region_id,
    "region_name": region_name,
    "latitude_center": lat_center,
    "longitude_center": lon_center,
    "average_income": avg_income,
    "flood_risk_index": flood_risk_index,
    "crime_risk_index": crime_index
})

regions_df.to_csv("regions.csv", index=False)

print("regions.csv created with", len(regions_df), "regions")


regions.csv created with 12 regions
