In [24]:
import pandas as pd
import numpy as np
import time

# results container
cameras = pd.DataFrame(columns=["duration", "checked", "amount", "fines", "desc", "lat", "lng"])

# location data
locations = pd.read_csv("./camera_locations.csv", usecols=["CAMERA TYPE", "LOCATION_CODE", "LATITUDE", "LONGITUDE", "LOCATION DESCRIPTION", "Decommissioned Camera_Date"])
locations.columns = ["type", "code", "lat", "lng", "desc", "decommissioned"]
locations.code = locations.code.str.upper()
locations.code = locations.code.apply(lambda x: x[:str(x).find("_")] if (str(x).find("_") > -1) else x)
locations.set_index("code", inplace=True)
# filter out decommissioned and fixed cameras
locations = locations[(locations.type == "MOBILE SPEED CAMERA") & (pd.isna(locations.decommissioned))]
locations = locations[["lat", "lng", "desc"]]

# deployments data
deployments = pd.read_csv("./camera_visits_and_stays.csv", usecols=["Date", "TimeAtSiteInHours", "Description of Site", "Camera Location", "Number Checked"], dtype={"Camera Location":"str"})
deployments.columns = ["date", "duration", "desc", "code", "checked"]
deployments.date = pd.to_datetime(deployments.date, format="%d/%m/%Y")
deployments.code = deployments.code.str.upper()
# remove junk characters
deployments.code = deployments.code.apply(lambda x: x[:str(x).find("\n")] if (str(x).find("\n") > -1) else x)
deployments.code = deployments.code.apply(lambda x: x[:str(x).find("\r")] if (str(x).find("\r") > -1) else x)
# filter out incomplete and old rows
deployments = deployments[pd.notna(deployments.code)]
deployments = deployments[(deployments.date > pd.to_datetime("2017-01-31")) & (deployments.date < pd.to_datetime("2020-02-01"))]

# offences data
offences = pd.read_csv("./camera_offences_and_fines.csv", usecols=["Offence_Month", "Camera_Type", "Location_Code", "Location_Desc", "Offence_Desc", "Sum_Pen_Amt", "Sum_Inf_Count"])
offences.columns = ["month", "type", "code", "desc", "offence", "amount", "fines"]
offences.code = offences.code.apply(lambda x: f"{int(x):04}")
offences.month = pd.to_datetime(offences.month)
# filter out non-mobile cameras and old rows
offences = offences[offences.type == "MOBILE SPEED CAMERA"]
offences = offences[(offences.month > pd.to_datetime("2017-01-31")) & (offences.month < pd.to_datetime("2020-02-01"))]

offences_group = offences.groupby("code").sum()
deployments_group = deployments.groupby("code").sum()

# crash data
crashes = pd.read_csv("./crash_data.csv", usecols=["CRASH_DATE", "LONGITUDE", "LATITUDE"])
crashes.columns = ["date", "lng", "lat"]
crashes.date = pd.to_datetime(crashes.date, format="%d/%m/%Y")
# filter out old rows
crashes = crashes[(crashes.date > pd.to_datetime("2017-01-31")) & (crashes.date < pd.to_datetime("2020-02-01"))]

def geo_distance(lat1, lng1, lat2, lng2):
    ''' Harversine equation for calculating distance between geolocations '''
    r = 6371
    phi1 = np.radians(lat1)
    phi2 = np.radians(lat2)
    delta_phi = np.radians(lat2 - lat1)
    delta_lambda = np.radians(lng2 - lng1)
    a = np.sin(delta_phi / 2)**2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda / 2)**2
    res = r * (2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a)))
    return res

# missing data
errors = []
start = time.time()
interval = 1
print(f"calculation under way ...", end="\r")
for i, camera in enumerate(set(locations.index)):
    if locations.loc[camera].index[0] == "lat":
        cameras.at[camera, "lat"] = locations.at[camera, "lat"]
        cameras.at[camera, "lng"] = locations.at[camera, "lng"]
        cameras.at[camera, "desc"] = locations.at[camera, "desc"]
    else:
        cameras.at[camera, "lat"] = locations.at[camera, "lat"][0]
        cameras.at[camera, "lng"] = locations.at[camera, "lng"][0]
        cameras.at[camera, "desc"] = locations.at[camera, "desc"][0]
    if camera in deployments_group.index:
        cameras.at[camera, "duration"] = deployments_group.at[camera, "duration"]
        cameras.at[camera, "checked"] = deployments_group.at[camera, "checked"]
    if camera[:4] in offences_group.index:
        total_checked = 0
        for indice, row in deployments_group.iterrows():
            if indice[:4] == camera[:4]:
                total_checked = total_checked + row.checked
            ratio = cameras.at[camera, "checked"] / total_checked if total_checked > 0 else 0.0
            cameras.at[camera, "amount"] = offences_group.at[camera[:4], "amount"] * ratio
            cameras.at[camera, "fines"] = offences_group.at[camera[:4], "fines"] * ratio
    if time.time() - start > interval:
        interval = interval + 1
        print(f"calculation under way ... {(i + 1) / len(set(locations.index)):.1%}", end="\r")
print(f"calculation under way ... complete")

# count proximal collisions
collisions = []
start = time.time()
interval = 1
print(f"calculating proximal collisions ...", end="\r")
for i, camera in enumerate(cameras.index):
    collision_distances = []
    for crash in crashes.index:
        collision_distances.append(
            geo_distance(crashes.at[crash, "lat"], crashes.at[crash, "lng"], cameras.at[camera, "lat"], cameras.at[camera, "lng"])
        )
    collision_distances = [x for x in collision_distances if x <= .25]
    collisions.append(len(collision_distances))
    if time.time() - start > interval:
        interval = interval + 1
        print(f"calculating proximal collisions ... {(i + 1) / len(cameras):.1%}", end="\r")
cameras["collisions"] = collisions
print(f"calculating proximal collisions ... complete")

cameras.dropna(inplace=True)
cameras["speeding"] = cameras.fines / cameras.checked
cameras["productivity_fines"] = cameras.fines / cameras.duration
cameras["productivity_amount"] = cameras.amount / cameras.duration
cameras["traffic_flow"] = cameras.checked / cameras.duration
cameras = cameras[["duration", "checked", "amount", "fines", "speeding", "productivity_fines", "productivity_amount", "traffic_flow", "collisions", "lat", "lng", "desc"]]
for column in cameras.columns[:-1]:
    cameras[column] = cameras[column].astype("float")

cameras.desc = cameras.desc.apply(lambda x: x.replace("Australian Capital Territory", "ACT"))
cameras.desc = cameras.desc.apply(lambda x: x[:x.find(" ACT")] if x.find(" ACT") > -1 else x)
cameras.desc = cameras.desc.apply(lambda x: x[:x.find(" NSW")] if x.find(" NSW") > -1 else x)
cameras.desc = cameras.desc.apply(lambda x: x[:-1] if x[-1] == "," else x)
cameras.desc = cameras.desc.apply(lambda x: x.replace(" Dr ", " Dr, "))
cameras.desc = cameras.desc.apply(lambda x: x.replace(" Street ", " St, "))
cameras.desc = cameras.desc.apply(lambda x: x.replace(" Crescent ", " Cres, "))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Crescent,", "Cres,"))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Drive,", "Dv,"))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Avenue,", "Ave,"))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Street,", "St,"))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Terrace,", "Tce,"))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Road,", "Rd,"))

cameras.to_csv("./camera_data.csv")
cameras

calculation under way ... complete
calculating proximal collisions ... complete


Unnamed: 0,duration,checked,amount,fines,speeding,productivity_fines,productivity_amount,traffic_flow,collisions,lat,lng,desc
0008A,40.50,25433.0,7379.353945,20.801794,0.000818,0.513625,182.206270,627.975309,9.0,-35.399890,149.106100,"Erindale Dr, Fadden"
0291B,21.40,2446.0,7285.098974,17.932551,0.007331,0.837970,340.425186,114.299065,0.0,-35.304377,149.102701,"59 Schlich St, Yarralumla"
0072B,49.77,36006.0,43262.287581,91.608381,0.002544,1.840635,869.244275,723.447860,2.0,-35.416180,149.144870,"Monaro Hwy, Gilmore"
0128B,78.46,13910.0,41897.848274,112.385366,0.008079,1.432391,534.002655,177.287790,5.0,-35.255060,149.152620,"49 Officer Cres, Ainslie"
0009B,12.14,7029.0,7016.792200,19.388859,0.002758,1.597105,577.989473,578.995058,30.0,-35.218190,149.019420,"7 Florey Dr, MacGregor"
...,...,...,...,...,...,...,...,...,...,...,...,...
0230B,36.50,2001.0,4205.882674,9.369927,0.004683,0.256710,115.229662,54.821918,2.0,-35.297275,149.034234,"John Gorton Dr, Denman Prospect"
0012E,10.64,4191.0,2312.339117,5.681485,0.001356,0.533974,217.325105,393.890977,21.0,-35.190420,149.124630,"Gungahlin Dr, Gungahlin"
0171G,20.09,7863.0,2628.776598,6.016631,0.000765,0.299484,130.850005,391.388751,58.0,-35.316240,149.145240,"45 Wentworth Ave, Kingston"
0002F,3.99,1202.0,1831.664690,4.233308,0.003522,1.060980,459.063832,301.253133,8.0,-35.247960,149.149350,"161 Antill St, Downer"


In [72]:
cameras[["lat", "lng"]].to_csv("camerasAll.csv", index=False)
cameras[["lat", "lng", "productivity_fines", "desc"]].sort_values("productivity_fines", ascending=False)[:50].to_csv("camerasBest.csv", index=False)

In [73]:
cameras

Unnamed: 0,duration,checked,amount,fines,speeding,productivity_fines,productivity_amount,traffic_flow,collisions,lat,lng,desc
3078A,14.47,3617.0,9.962400e+04,222.000000,0.061377,15.342087,6884.865238,249.965446,4.0,-35.225924,149.028609,"Starke Street, Higgins"
0243I,5.83,8996.0,3.719150e+04,75.452973,0.008387,12.942191,6379.330246,1543.053173,3.0,-35.234002,149.195269,"Majura Parkway, Canberra"
3014A,231.10,77122.0,1.322098e+06,2680.000000,0.034750,11.596711,5720.891389,333.717006,20.0,-35.336347,149.148487,"Narrabundah College, 20 Jerrabomberra Ave, Nar..."
3050A,60.48,13309.0,3.209558e+05,682.781411,0.051302,11.289375,5306.809345,220.056217,13.0,-35.196955,149.145502,"135 Oodgeroo Ave, Franklin"
3016A,192.90,60762.0,1.038999e+06,2127.000000,0.035005,11.026439,5386.205288,314.992224,7.0,-35.331375,149.122657,"46 Mugga Way, Red Hill"
...,...,...,...,...,...,...,...,...,...,...,...,...
0087A,2.66,422.0,0.000000e+00,0.000000,0.000000,0.000000,0.000000,158.646617,2.0,-35.426780,149.107410,"56 Clift Cres, Richardson"
0087D,1.25,206.0,0.000000e+00,0.000000,0.000000,0.000000,0.000000,164.800000,5.0,-35.426980,149.111070,"63 Clift Cres, Richardson"
0006F,2.83,1033.0,0.000000e+00,0.000000,0.000000,0.000000,0.000000,365.017668,35.0,-35.271350,149.050330,"Coppins Crossing Rd, Molonglo Valley"
0087E,1.33,257.0,0.000000e+00,0.000000,0.000000,0.000000,0.000000,193.233083,8.0,-35.426330,149.112150,"73 Clift Cres, Richardson"


In [82]:
cameras.head(50)

Unnamed: 0,duration,checked,amount,fines,speeding,productivity_fines,productivity_amount,traffic_flow,collisions,lat,lng,desc
3078A,14.47,3617.0,99624.0,222.0,0.061377,15.342087,6884.865238,249.965446,4.0,-35.225924,149.028609,"Starke Street, Higgins"
0243I,5.83,8996.0,37191.5,75.452973,0.008387,12.942191,6379.330246,1543.053173,3.0,-35.234002,149.195269,"Majura Parkway, Canberra"
3014A,231.1,77122.0,1322098.0,2680.0,0.03475,11.596711,5720.891389,333.717006,20.0,-35.336347,149.148487,"Narrabundah College, 20 Jerrabomberra Ave, Nar..."
3050A,60.48,13309.0,320955.8,682.781411,0.051302,11.289375,5306.809345,220.056217,13.0,-35.196955,149.145502,"135 Oodgeroo Ave, Franklin"
3016A,192.9,60762.0,1038999.0,2127.0,0.035005,11.026439,5386.205288,314.992224,7.0,-35.331375,149.122657,"46 Mugga Way, Red Hill"
3036A,139.36,66503.0,700361.0,1467.0,0.022059,10.526693,5025.552526,477.202928,46.0,-35.248805,149.154729,"173 Phillip Ave, Hackett"
4003A,81.17,53586.0,298907.0,837.0,0.01562,10.311692,3682.481212,660.170014,4.0,-35.315765,149.076981,240 Cotter Road
0243H,30.36,34185.0,141328.5,286.722974,0.008387,9.444103,4655.089233,1125.988142,3.0,-35.258807,149.187156,"Majura Parkway, Canberra"
3007A,104.83,41555.0,439568.0,965.0,0.023222,9.20538,4193.150816,396.403701,19.0,-35.253978,149.147274,"129 Majura Ave, Dickson"
3038A,126.18,77535.0,499863.0,1131.0,0.014587,8.963386,3961.50737,614.479315,29.0,-35.253648,149.139205,"112 Cowper St, Dickson"
