In [1]:
import pandas as pd
import numpy as np
import random
from faker import Faker
from datetime import timedelta

fake = Faker("en_US")

# 1. Organize districts into a Province-to-District mapping
PROVINCE_DISTRICT_MAP = {
    "Bulawayo": ["Bulawayo (Metropolitan)"],
    "Harare": ["Harare Urban", "Harare Rural", "Chitungwiza", "Epworth"],
    "Manicaland": ["Buhera", "Chimanimani", "Chipinge", "Makoni", "Mutare", "Mutasa", "Nyanga", "Rusape (Urban)"],
    "Mashonaland Central": ["Bindura", "Guruve", "Mazowe", "Mbire", "Mount Darwin", "Muzarabani", "Rushinga", "Shamva"],
    "Mashonaland East": ["Chikomba", "Goromonzi", "Hwedza", "Marondera", "Mudzi", "Murehwa", "Mutoko", "Seke", "Uzumba-Maramba-Pfungwe"],
    "Mashonaland West": ["Chegutu", "Hurungwe", "Kariba", "Makonde", "Mhondoro-Ngezi", "Sanyati", "Zvimba"],
    "Masvingo": ["Bikita", "Chiredzi", "Chivi", "Gutu", "Masvingo", "Mwenezi", "Zaka"],
    "Matabeleland North": ["Binga", "Bubi", "Hwange", "Lupane", "Nkayi", "Tsholotsho", "Umguza", "Victoria Falls (Urban)"],
    "Matabeleland South": ["Beitbridge", "Bulilima", "Gwanda", "Insiza", "Mangwe", "Matobo", "Umzingwane"],
    "Midlands": ["Chirumhanzu", "Gokwe North", "Gokwe South", "Gweru", "Kwekwe", "Mberengwa", "Shurugwi", "Zvishavane"]
}

PROVINCES = list(PROVINCE_DISTRICT_MAP.keys())

# 2. Update towers to include all provinces and urban/rural status
towers = pd.DataFrame({
    "cell_tower_id": range(1001, 1101),
    "province": np.random.choice(PROVINCES, 100),
    "urban_rural": np.random.choice(["Urban", "Rural"], 100, p=[0.6, 0.4])
})

def generate_caller_number():
    prefix = random.choice(["077", "078"])
    remaining_digits = ''.join(str(random.randint(0, 9)) for _ in range(7))
    return prefix + remaining_digits

def generate_receiver_number():
    prefix = random.choice(["078", "071", "073", "077"])
    remaining_digits = "".join(str(random.randint(0, 9)) for _ in range(7))
    return prefix + remaining_digits

# 3. Updated record generator with Network Logic
def generate_cdr_record():
    # Pick a random tower and use its province and urban/rural status
    tower = towers.sample(n=1).iloc[0]
    province = tower["province"]
    urban_rural = tower["urban_rural"]
    tower_id = tower["cell_tower_id"]

    # Select a district that correctly falls under the selected province
    district = random.choice(PROVINCE_DISTRICT_MAP[province])

    # Network Logic
    if province in ["Harare", "Bulawayo"]:
        network = "5G"
    elif urban_rural == "Urban":
        network = "4G"
    else:
        network = random.choice(["3G", "2G"])

    return {
        "account_nbr": fake.uuid4(),
        "caller_msisdn": generate_caller_number(),
        "receiver_msisdn": generate_receiver_number(),
        "province": province,
        "district": district,
        "urban_rural": urban_rural, # Included to verify logic
        "network": network,         # New Column
        "homing_latitude": round(random.uniform(-22.5, -15.5), 6),
        "homing_longitude": round(random.uniform(25.0, 33.1), 6),
        "call_type": random.choice(["voice", "sms", "data"]),
        "call_start_time": fake.date_time_between(start_date="-7d", end_date="now"),
        "call_duration_sec": random.randint(5, 3600),
        "cell_tower_id": tower_id
    }

# Testing the generation
N = 1000
data = [generate_cdr_record() for _ in range(N)]
df = pd.DataFrame(data)
print(df[['province', 'district', 'urban_rural', 'network']].head(10))

             province                 district urban_rural network
0            Midlands                    Gweru       Rural      2G
1    Mashonaland East                Marondera       Urban      4G
2  Matabeleland North   Victoria Falls (Urban)       Urban      4G
3    Mashonaland East                 Chikomba       Urban      4G
4              Harare                  Epworth       Urban      5G
5            Bulawayo  Bulawayo (Metropolitan)       Urban      5G
6  Matabeleland North                    Nkayi       Rural      2G
7            Bulawayo  Bulawayo (Metropolitan)       Urban      5G
8  Matabeleland North   Victoria Falls (Urban)       Rural      2G
9  Matabeleland South                   Mangwe       Rural      3G


In [2]:
df.head()

Unnamed: 0,account_nbr,caller_msisdn,receiver_msisdn,province,district,urban_rural,network,homing_latitude,homing_longitude,call_type,call_start_time,call_duration_sec,cell_tower_id
0,bdd7e133-ce5d-4baf-8ec3-b730dcd65348,772395490,772591892,Midlands,Gweru,Rural,2G,-17.384826,26.084201,voice,2025-12-23 20:20:42,2638,1088
1,a48b4ba2-91de-4e34-98a5-1d331e3aa2cd,788742838,784165497,Mashonaland East,Marondera,Urban,4G,-20.483692,25.048799,voice,2025-12-30 00:41:34,868,1034
2,e8019908-5660-4c45-bb97-6e3833e62030,772911620,736891855,Matabeleland North,Victoria Falls (Urban),Urban,4G,-17.428829,27.205304,data,2025-12-27 04:30:43,3168,1056
3,498f380a-aef2-40dd-b9ef-4828681140ea,772894020,734426080,Mashonaland East,Chikomba,Urban,4G,-17.282711,29.876139,sms,2025-12-25 17:10:29,1772,1016
4,360a5c4a-6ac6-4458-b8a6-edd21b814ba6,775571079,775425420,Harare,Epworth,Urban,5G,-16.076543,25.638821,data,2025-12-27 19:06:47,59,1025
