In [6]:
import pandas as pd
import numpy as np
import random
import uuid
from datetime import datetime, timedelta

# 1. Province-to-District and Geographic Coordinate Mapping
# Ranges are approximate bounding boxes for each province
PROVINCE_CONFIG = {
    "Bulawayo": {
        "districts": ["Bulawayo (Metropolitan)"],
        "lat_range": (-20.30, -20.00), "lon_range": (28.40, 28.80)
    },
    "Harare": {
        "districts": ["Harare Urban", "Harare Rural", "Chitungwiza", "Epworth"],
        "lat_range": (-18.05, -17.70), "lon_range": (30.90, 31.25)
    },
    "Manicaland": {
        "districts": ["Buhera", "Chimanimani", "Chipinge", "Makoni", "Mutare", "Mutasa", "Nyanga", "Rusape (Urban)"],
        "lat_range": (-21.00, -17.50), "lon_range": (32.00, 33.10)
    },
    "Mashonaland Central": {
        "districts": ["Bindura", "Guruve", "Mazowe", "Mbire", "Mount Darwin", "Muzarabani", "Rushinga", "Shamva"],
        "lat_range": (-17.50, -15.60), "lon_range": (30.50, 32.00)
    },
    "Mashonaland East": {
        "districts": ["Chikomba", "Goromonzi", "Hwedza", "Marondera", "Mudzi", "Murehwa", "Mutoko", "Seke", "Uzumba-Maramba-Pfungwe"],
        "lat_range": (-19.30, -17.00), "lon_range": (30.50, 33.00)
    },
    "Mashonaland West": {
        "districts": ["Chegutu", "Hurungwe", "Kariba", "Makonde", "Mhondoro-Ngezi", "Sanyati", "Zvimba"],
        "lat_range": (-18.50, -15.50), "lon_range": (28.50, 31.00)
    },
    "Masvingo": {
        "districts": ["Bikita", "Chiredzi", "Chivi", "Gutu", "Masvingo", "Mwenezi", "Zaka"],
        "lat_range": (-22.30, -19.00), "lon_range": (30.00, 32.50)
    },
    "Matabeleland North": {
        "districts": ["Binga", "Bubi", "Hwange", "Lupane", "Nkayi", "Tsholotsho", "Umguza", "Victoria Falls (Urban)"],
        "lat_range": (-20.00, -17.00), "lon_range": (25.20, 29.00)
    },
    "Matabeleland South": {
        "districts": ["Beitbridge", "Bulilima", "Gwanda", "Insiza", "Mangwe", "Matobo", "Umzingwane"],
        "lat_range": (-22.50, -20.00), "lon_range": (27.50, 31.10)
    },
    "Midlands": {
        "districts": ["Chirumhanzu", "Gokwe North", "Gokwe South", "Gweru", "Kwekwe", "Mberengwa", "Shurugwi", "Zvishavane"],
        "lat_range": (-21.00, -18.00), "lon_range": (29.00, 30.50)
    }
}

PROVINCES = list(PROVINCE_CONFIG.keys())

def generate_cdr_record():
    province = random.choice(PROVINCES)
    config = PROVINCE_CONFIG[province]
    
    district = random.choice(config["districts"])
    
    # Correct localized coordinates for the chosen province
    lat = round(random.uniform(*config["lat_range"]), 6)
    lon = round(random.uniform(*config["lon_range"]), 6)

    # Network logic (5G for Harare/Bulawayo, 4G for towns, 3G/2G for rural)
    is_major_city = province in ["Harare", "Bulawayo"]
    is_urban = "Urban" in district or "Metropolitan" in district or random.random() < 0.3
    
    if is_major_city:
        network = "5G"
    elif is_urban:
        network = "4G"
    else:
        network = random.choice(["3G", "2G"])

    return {
        "account_nbr": str(uuid.uuid4()),
        "caller_msisdn": "07" + str(random.randint(10000000, 99999999))[1:],
        "receiver_msisdn": "07" + str(random.randint(10000000, 99999999))[1:],
        "province": province,
        "district": district,
        "network": network,
        "latitude": lat,
        "longitude": lon,
        "call_type": random.choice(["voice", "sms", "data"]),
        "call_start_time": datetime.now() - timedelta(days=random.randint(0, 7)),
        "call_duration_sec": random.randint(5, 3600),
        "cell_tower_id": random.randint(1000, 9999)
    }

# Generate sample
df = pd.DataFrame([generate_cdr_record() for _ in range(1000)])

In [7]:
df.head()

Unnamed: 0,account_nbr,caller_msisdn,receiver_msisdn,province,district,network,latitude,longitude,call_type,call_start_time,call_duration_sec,cell_tower_id
0,9097d3f8-2d0d-455c-a6ba-600f90e169f9,75804793,74154457,Harare,Harare Urban,5G,-17.752463,30.914553,sms,2025-12-28 11:10:54.314129,1690,5228
1,6b5a59a2-b6fe-41fe-8829-ade5a09bded2,79269532,79658674,Mashonaland East,Mudzi,2G,-18.28774,31.057154,sms,2025-12-23 11:10:54.314201,3370,3534
2,e4bc7294-5086-444c-9993-9c3cccd4a109,74716426,79132470,Mashonaland West,Chegutu,2G,-15.718826,30.325276,sms,2025-12-25 11:10:54.314243,1334,9854
3,77f9f6ab-3500-473e-9a84-de448f956613,75616923,78863773,Masvingo,Bikita,2G,-21.211361,30.669025,voice,2025-12-24 11:10:54.314278,2644,2256
4,78048c70-5aa8-4848-8f5f-9221931bfdc1,79761953,76504026,Harare,Harare Rural,5G,-17.744725,30.997101,sms,2025-12-24 11:10:54.314317,83,4749
