In [1]:
import pandas as pd
import numpy as np
import time

# results container
cameras = pd.DataFrame(columns=["duration", "checked", "amount", "fines", "desc", "lat", "lng"])

# location data
locations = pd.read_csv("./camera_locations.csv", usecols=["CAMERA TYPE", "LOCATION_CODE", "LATITUDE", "LONGITUDE", "LOCATION DESCRIPTION", "Decommissioned Camera_Date"])
locations.columns = ["type", "code", "lat", "lng", "desc", "decommissioned"]
locations.code = locations.code.str.upper()
locations.code = locations.code.apply(lambda x: x[:str(x).find("_")] if (str(x).find("_") > -1) else x)
locations.set_index("code", inplace=True)
# filter out decommissioned and fixed cameras
locations = locations[(locations.type == "MOBILE SPEED CAMERA") & (pd.isna(locations.decommissioned))]
locations = locations[["lat", "lng", "desc"]]

# deployments data
deployments = pd.read_csv("./camera_visits_and_stays.csv", usecols=["Date", "TimeAtSiteInHours", "Description of Site", "Camera Location", "Number Checked"], dtype={"Camera Location":"str"})
deployments.columns = ["date", "duration", "desc", "code", "checked"]
deployments.date = pd.to_datetime(deployments.date, format="%d/%m/%Y")
deployments.code = deployments.code.str.upper()
# remove junk characters
deployments.code = deployments.code.apply(lambda x: x[:str(x).find("\n")] if (str(x).find("\n") > -1) else x)
deployments.code = deployments.code.apply(lambda x: x[:str(x).find("\r")] if (str(x).find("\r") > -1) else x)
# filter out incomplete and old rows
deployments = deployments[pd.notna(deployments.code)]
deployments = deployments[(deployments.date > pd.to_datetime("2017-01-31")) & (deployments.date < pd.to_datetime("2020-02-01"))]

# offences data
offences = pd.read_csv("./camera_offences_and_fines.csv", usecols=["Offence_Month", "Camera_Type", "Location_Code", "Location_Desc", "Offence_Desc", "Sum_Pen_Amt", "Sum_Inf_Count"])
offences.columns = ["month", "type", "code", "desc", "offence", "amount", "fines"]
offences.code = offences.code.apply(lambda x: f"{int(x):04}")
offences.month = pd.to_datetime(offences.month)
# filter out non-mobile cameras and old rows
offences = offences[offences.type == "MOBILE SPEED CAMERA"]
offences = offences[(offences.month > pd.to_datetime("2017-01-31")) & (offences.month < pd.to_datetime("2020-02-01"))]

offences_group = offences.groupby("code").sum()
deployments_group = deployments.groupby("code").sum()

# crash data
crashes = pd.read_csv("./crash_data.csv", usecols=["CRASH_DATE", "LONGITUDE", "LATITUDE"])
crashes.columns = ["date", "lng", "lat"]
crashes.date = pd.to_datetime(crashes.date, format="%d/%m/%Y")
# filter out old rows
crashes = crashes[(crashes.date > pd.to_datetime("2017-01-31")) & (crashes.date < pd.to_datetime("2020-02-01"))]

def geo_distance(lat1, lng1, lat2, lng2):
    ''' Harversine equation for calculating distance between geolocations '''
    r = 6371
    phi1 = np.radians(lat1)
    phi2 = np.radians(lat2)
    delta_phi = np.radians(lat2 - lat1)
    delta_lambda = np.radians(lng2 - lng1)
    a = np.sin(delta_phi / 2)**2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda / 2)**2
    res = r * (2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a)))
    return res

# missing data
errors = []
start = time.time()
interval = 1
print(f"calculation under way ...", end="\r")
for i, camera in enumerate(set(locations.index)):
    if locations.loc[camera].index[0] == "lat":
        cameras.at[camera, "lat"] = locations.at[camera, "lat"]
        cameras.at[camera, "lng"] = locations.at[camera, "lng"]
        cameras.at[camera, "desc"] = locations.at[camera, "desc"]
    else:
        cameras.at[camera, "lat"] = locations.at[camera, "lat"][0]
        cameras.at[camera, "lng"] = locations.at[camera, "lng"][0]
        cameras.at[camera, "desc"] = locations.at[camera, "desc"][0]
    if camera in deployments_group.index:
        cameras.at[camera, "duration"] = deployments_group.at[camera, "duration"]
        cameras.at[camera, "checked"] = deployments_group.at[camera, "checked"]
    if camera[:4] in offences_group.index:
        total_checked = 0
        for indice, row in deployments_group.iterrows():
            if indice[:4] == camera[:4]:
                total_checked = total_checked + row.checked
            ratio = cameras.at[camera, "checked"] / total_checked if total_checked > 0 else 0.0
            cameras.at[camera, "amount"] = offences_group.at[camera[:4], "amount"] * ratio
            cameras.at[camera, "fines"] = offences_group.at[camera[:4], "fines"] * ratio
    if time.time() - start > interval:
        interval = interval + 1
        print(f"calculation under way ... {(i + 1) / len(set(locations.index)):.1%}", end="\r")
print(f"calculation under way ... complete")

# count proximal collisions
collisions = []
start = time.time()
interval = 1
print(f"calculating proximal collisions ...", end="\r")
for i, camera in enumerate(cameras.index):
    collision_distances = []
    for crash in crashes.index:
        collision_distances.append(
            geo_distance(crashes.at[crash, "lat"], crashes.at[crash, "lng"], cameras.at[camera, "lat"], cameras.at[camera, "lng"])
        )
    collision_distances = [x for x in collision_distances if x <= .25]
    collisions.append(len(collision_distances))
    if time.time() - start > interval:
        interval = interval + 1
        print(f"calculating proximal collisions ... {(i + 1) / len(cameras):.1%}", end="\r")
cameras["collisions"] = collisions
print(f"calculating proximal collisions ... complete")

cameras.dropna(inplace=True)
cameras["speeding"] = cameras.fines / cameras.checked
cameras["productivity_fines"] = cameras.fines / cameras.duration
cameras["productivity_amount"] = cameras.amount / cameras.duration
cameras["traffic_flow"] = cameras.checked / cameras.duration
cameras = cameras[["duration", "checked", "amount", "fines", "speeding", "productivity_fines", "productivity_amount", "traffic_flow", "collisions", "lat", "lng", "desc"]]
for column in cameras.columns[:-1]:
    cameras[column] = cameras[column].astype("float")

cameras.desc = cameras.desc.apply(lambda x: x.replace("Australian Capital Territory", "ACT"))
cameras.desc = cameras.desc.apply(lambda x: x[:x.find(" ACT")] if x.find(" ACT") > -1 else x)
cameras.desc = cameras.desc.apply(lambda x: x[:x.find(" NSW")] if x.find(" NSW") > -1 else x)
cameras.desc = cameras.desc.apply(lambda x: x[:-1] if x[-1] == "," else x)
cameras.desc = cameras.desc.apply(lambda x: x.replace(" Dr ", " Dr, "))
cameras.desc = cameras.desc.apply(lambda x: x.replace(" Street ", " St, "))
cameras.desc = cameras.desc.apply(lambda x: x.replace(" Crescent ", " Cres, "))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Crescent,", "Cres,"))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Drive,", "Dv,"))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Avenue,", "Ave,"))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Street,", "St,"))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Terrace,", "Tce,"))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Road,", "Rd,"))

cameras.to_csv("./camera_data.csv")
cameras

calculation under way ... complete
calculating proximal collisions ... complete


Unnamed: 0,duration,checked,amount,fines,speeding,productivity_fines,productivity_amount,traffic_flow,collisions,lat,lng,desc
0106C,24.70,10899.0,7374.550918,18.295098,0.001679,0.740692,298.564814,441.255061,0.0,-35.219370,149.069470,"163 William Webb Dr, McKellar"
0288A,45.15,12279.0,20017.000000,48.000000,0.003909,1.063123,443.344408,271.960133,2.0,-35.319307,149.102701,"118 Strickland Cres, Deakin"
3050A,60.48,13309.0,320955.829207,682.781411,0.051302,11.289375,5306.809345,220.056217,13.0,-35.196955,149.145502,"135 Oodgeroo Ave, Franklin"
0256A,13.65,2327.0,2555.734567,7.090509,0.003047,0.519451,187.233302,170.476190,12.0,-35.414090,149.115099,"114 Bugden Ave, Gowrie"
0016D,4.33,2638.0,1295.884330,3.541330,0.001342,0.817859,299.280446,609.237875,2.0,-35.309920,149.078260,"592 Lady Denman Dr, Yarralumla"
...,...,...,...,...,...,...,...,...,...,...,...,...
0037B,9.40,5148.0,2855.865482,8.146401,0.001582,0.866638,303.815477,547.659574,12.0,-35.348560,149.056830,"32 Namatjira Dr, Stirling"
0250B,28.94,5660.0,10957.179487,28.196337,0.004982,0.974303,378.617121,195.577056,5.0,-35.461360,149.084315,"72 Knoke Ave, Gordon"
0014I,2.66,727.0,477.562206,0.996342,0.001370,0.374565,179.534664,273.308271,30.0,-35.344480,149.053260,"Hindmarsh Dr, Weston"
0002H,27.03,8662.0,13199.567007,30.506587,0.003522,1.128620,488.330263,320.458750,6.0,-35.241970,149.162170,"293 Antill St, Watson"


In [72]:
cameras[["lat", "lng"]].to_csv("camerasAll.csv", index=False)
cameras[["lat", "lng", "productivity_fines", "desc"]].sort_values("productivity_fines", ascending=False)[:50].to_csv("camerasBest.csv", index=False)

In [36]:
import pandas as pd
cameras = pd.read_csv("camera_data.csv", index_col="Unnamed: 0")
cameras.sort_values("productivity_fines", ascending=False, inplace=True)
new_duration = []
for camera in cameras.index:
    new_duration.append(cameras.at[camera, "productivity_fines"] / cameras.productivity_fines.sum() * cameras.duration.sum())
cameras["new_duration"] = new_duration
cameras.duration = cameras.duration / 3
cameras.new_duration = cameras.new_duration / 3
cameras.amount = cameras.amount / 3
cameras.fines = cameras.fines / 3
cameras[["duration", "productivity_fines", "new_duration"]].to_csv("./camerasChart.csv", index=False)
cameras

Unnamed: 0,duration,checked,amount,fines,speeding,productivity_fines,productivity_amount,traffic_flow,collisions,lat,lng,desc,new_duration
3078A,4.823333,3617.0,33208.000000,74.000000,0.061377,15.342087,6884.865238,249.965446,4.0,-35.225924,149.028609,"Starke St, Higgins",179.027363
0243I,1.943333,8996.0,12397.165112,25.150991,0.008387,12.942191,6379.330246,1543.053173,3.0,-35.234002,149.195269,"Majura Parkway, Canberra",151.022889
3014A,77.033333,77122.0,440699.333333,893.333333,0.034750,11.596711,5720.891389,333.717006,20.0,-35.336347,149.148487,"Narrabundah College, 20 Jerrabomberra Ave, Nar...",135.322440
3050A,20.160000,13309.0,106985.276402,227.593804,0.051302,11.289375,5306.809345,220.056217,13.0,-35.196955,149.145502,"135 Oodgeroo Ave, Franklin",131.736123
3016A,64.300000,60762.0,346333.000000,709.000000,0.035005,11.026439,5386.205288,314.992224,7.0,-35.331375,149.122657,"46 Mugga Way, Red Hill",128.667907
...,...,...,...,...,...,...,...,...,...,...,...,...,...
0087D,0.416667,206.0,0.000000,0.000000,0.000000,0.000000,0.000000,164.800000,5.0,-35.426980,149.111070,"63 Clift Cres, Richardson",0.000000
0087E,0.443333,257.0,0.000000,0.000000,0.000000,0.000000,0.000000,193.233083,8.0,-35.426330,149.112150,"73 Clift Cres, Richardson",0.000000
0087A,0.886667,422.0,0.000000,0.000000,0.000000,0.000000,0.000000,158.646617,2.0,-35.426780,149.107410,"56 Clift Cres, Richardson",0.000000
0087F,0.886667,926.0,0.000000,0.000000,0.000000,0.000000,0.000000,348.120301,7.0,-35.429970,149.119980,"123 Clift Cres, Chisholm",0.000000


In [38]:
(cameras.new_duration * cameras.productivity_amount).sum()

19371706.229320146