In [45]:
import pandas as pd
import numpy as np
import time

# results container
cameras = pd.DataFrame(columns=["duration", "checked", "amount", "fines", "desc", "lat", "lng"])

# location data
locations = pd.read_csv("./camera_locations.csv", usecols=["CAMERA TYPE", "LOCATION_CODE", "LATITUDE", "LONGITUDE", "LOCATION DESCRIPTION", "Decommissioned Camera_Date"])
locations.columns = ["type", "code", "lat", "lng", "desc", "decommissioned"]
locations.code = locations.code.str.upper()
locations.code = locations.code.apply(lambda x: x[:str(x).find("_")] if (str(x).find("_") > -1) else x)
locations.set_index("code", inplace=True)
# filter out decommissioned and fixed cameras
locations = locations[(locations.type == "MOBILE SPEED CAMERA") & (pd.isna(locations.decommissioned))]
locations = locations[["lat", "lng", "desc"]]

# deployments data
deployments = pd.read_csv("./camera_visits_and_stays.csv", usecols=["Date", "TimeAtSiteInHours", "Description of Site", "Camera Location", "Number Checked"], dtype={"Camera Location":"str"})
deployments.columns = ["date", "duration", "desc", "code", "checked"]
deployments.date = pd.to_datetime(deployments.date, format="%d/%m/%Y")
deployments.code = deployments.code.str.upper()
# remove junk characters
deployments.code = deployments.code.apply(lambda x: x[:str(x).find("\n")] if (str(x).find("\n") > -1) else x)
deployments.code = deployments.code.apply(lambda x: x[:str(x).find("\r")] if (str(x).find("\r") > -1) else x)
# filter out incomplete and old rows
deployments = deployments[pd.notna(deployments.code)]
deployments = deployments[(deployments.date > pd.to_datetime("2017-01-31")) & (deployments.date < pd.to_datetime("2020-02-01"))]

# offences data
offences = pd.read_csv("./camera_offences_and_fines.csv", usecols=["Offence_Month", "Camera_Type", "Location_Code", "Location_Desc", "Offence_Desc", "Sum_Pen_Amt", "Sum_Inf_Count"])
offences.columns = ["month", "type", "code", "desc", "offence", "amount", "fines"]
offences.code = offences.code.apply(lambda x: f"{int(x):04}")
offences.month = pd.to_datetime(offences.month)
# filter out non-mobile cameras and old rows
offences = offences[offences.type == "MOBILE SPEED CAMERA"]
offences = offences[(offences.month > pd.to_datetime("2017-01-31")) & (offences.month < pd.to_datetime("2020-02-01"))]

offences_group = offences.groupby("code").sum()
deployments_group = deployments.groupby("code").sum()

# crash data
crashes = pd.read_csv("./crash_data.csv", usecols=["CRASH_DATE", "LONGITUDE", "LATITUDE"])
crashes.columns = ["date", "lng", "lat"]
crashes.date = pd.to_datetime(crashes.date, format="%d/%m/%Y")
# filter out old rows
crashes = crashes[(crashes.date > pd.to_datetime("2015-01-31")) & (crashes.date < pd.to_datetime("2020-02-01"))]

def geo_distance(lat1, lng1, lat2, lng2):
    ''' Harversine equation for calculating distance between geolocations '''
    r = 6371
    phi1 = np.radians(lat1)
    phi2 = np.radians(lat2)
    delta_phi = np.radians(lat2 - lat1)
    delta_lambda = np.radians(lng2 - lng1)
    a = np.sin(delta_phi / 2)**2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda / 2)**2
    res = r * (2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a)))
    return res

# missing data
errors = []
start = time.time()
interval = 1
print(f"calculation under way ...", end="\r")
for i, camera in enumerate(set(locations.index)):
    if locations.loc[camera].index[0] == "lat":
        cameras.at[camera, "lat"] = locations.at[camera, "lat"]
        cameras.at[camera, "lng"] = locations.at[camera, "lng"]
        cameras.at[camera, "desc"] = locations.at[camera, "desc"]
    else:
        cameras.at[camera, "lat"] = locations.at[camera, "lat"][0]
        cameras.at[camera, "lng"] = locations.at[camera, "lng"][0]
        cameras.at[camera, "desc"] = locations.at[camera, "desc"][0]
    if camera in deployments_group.index:
        cameras.at[camera, "duration"] = deployments_group.at[camera, "duration"]
        cameras.at[camera, "checked"] = deployments_group.at[camera, "checked"]
    if camera[:4] in offences_group.index:
        total_checked = 0
        for indice, row in deployments_group.iterrows():
            if indice[:4] == camera[:4]:
                total_checked = total_checked + row.checked
            ratio = cameras.at[camera, "checked"] / total_checked if total_checked > 0 else 0.0
            cameras.at[camera, "amount"] = offences_group.at[camera[:4], "amount"] * ratio
            cameras.at[camera, "fines"] = offences_group.at[camera[:4], "fines"] * ratio
    if time.time() - start > interval:
        interval = interval + 1
        print(f"calculation under way ... {(i + 1) / len(set(locations.index)):.1%}", end="\r")
print(f"calculation under way ... complete")

# count proximal collisions
collisions = []
start = time.time()
interval = 1
print(f"calculating proximal collisions ...", end="\r")
for i, camera in enumerate(cameras.index):
    collision_distances = []
    for crash in crashes.index:
        collision_distances.append(
            geo_distance(crashes.at[crash, "lat"], crashes.at[crash, "lng"], cameras.at[camera, "lat"], cameras.at[camera, "lng"])
        )
    collision_distances = [x for x in collision_distances if x <= .25]
    collisions.append(len(collision_distances))
    if time.time() - start > interval:
        interval = interval + 1
        print(f"calculating proximal collisions ... {(i + 1) / len(cameras):.1%}", end="\r")
cameras["collisions"] = collisions
print(f"calculating proximal collisions ... complete")

cameras.dropna(inplace=True)
cameras["speeding"] = cameras.fines / cameras.checked
cameras["productivity_fines"] = cameras.fines / cameras.duration
cameras["productivity_amount"] = cameras.amount / cameras.duration
cameras["traffic_flow"] = cameras.checked / cameras.duration
cameras = cameras[["duration", "checked", "amount", "fines", "speeding", "productivity_fines", "productivity_amount", "traffic_flow", "collisions", "lat", "lng", "desc"]]
for column in cameras.columns[:-1]:
    cameras[column] = cameras[column].astype("float")

cameras.desc = cameras.desc.apply(lambda x: x.replace("Australian Capital Territory", "ACT"))
cameras.desc = cameras.desc.apply(lambda x: x[:x.find(" ACT")] if x.find(" ACT") > -1 else x)
cameras.desc = cameras.desc.apply(lambda x: x[:x.find(" NSW")] if x.find(" NSW") > -1 else x)
cameras.desc = cameras.desc.apply(lambda x: x[:-1] if x[-1] == "," else x)
cameras.desc = cameras.desc.apply(lambda x: x.replace(" Dr ", " Dr, "))
cameras.desc = cameras.desc.apply(lambda x: x.replace(" Street ", " St, "))
cameras.desc = cameras.desc.apply(lambda x: x.replace(" Crescent ", " Cres, "))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Crescent,", "Cres,"))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Drive,", "Dv,"))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Avenue,", "Ave,"))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Street,", "St,"))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Terrace,", "Tce,"))
cameras.desc = cameras.desc.apply(lambda x: x.replace("Road,", "Rd,"))

cameras.to_csv("./camera_data.csv")
cameras

calculation under way ... complete
calculating proximal collisions ... complete


Unnamed: 0,duration,checked,amount,fines,speeding,productivity_fines,productivity_amount,traffic_flow,collisions,lat,lng,desc
0037A,10.81,3574.0,1982.685166,5.655641,0.001582,0.523186,183.412134,330.619796,30.0,-35.342930,149.057510,"22 Namatjira Dr, Weston"
0156D,10.90,5655.0,3808.034558,8.350953,0.001477,0.766142,349.360969,518.807339,39.0,-35.337110,149.051660,"Streeton Dr, Holder"
0072Q,22.84,13147.0,15796.514326,33.449297,0.002544,1.464505,691.616214,575.612960,1.0,-35.519423,149.145319,Monaro Hwy
0023A,81.05,106336.0,5006.000000,11.000000,0.000103,0.135719,61.764343,1311.980259,21.0,-35.285090,149.102070,"Parkes Way, Acton"
3071A,61.91,13991.0,107006.000000,236.000000,0.016868,3.811985,1728.412211,225.989339,38.0,-35.254563,149.076254,"30 Bindel St, Aranda"
...,...,...,...,...,...,...,...,...,...,...,...,...
0007C,174.50,154058.0,88132.068760,207.459423,0.001347,1.188879,505.054835,882.853868,7.0,-35.389080,149.065800,"Drakeford Dr, Kambah"
0156H,26.77,6877.0,4630.920187,10.155527,0.001477,0.379362,172.989174,256.892043,2.0,-35.347970,149.044570,"68 Streeton Dr, Rivett"
0124B,5.43,2081.0,1029.386694,3.749067,0.001802,0.690436,189.573977,383.241252,69.0,-35.220990,149.017420,"186 Starke St, Holt"
0243H,30.36,34185.0,141328.509122,286.722974,0.008387,9.444103,4655.089233,1125.988142,4.0,-35.258807,149.187156,"Majura Parkway, Canberra"


In [46]:
cameras = pd.read_csv("camera_data.csv", index_col="Unnamed: 0")
cameras.sort_values("productivity_fines", ascending=False, inplace=True)
cameras[["lat", "lng"]].to_csv("camerasAll.csv", index=False)
cameras[["lat", "lng", "productivity_fines", "desc"]][:50].to_csv("camerasBest.csv", index=False)
cameras

Unnamed: 0,duration,checked,amount,fines,speeding,productivity_fines,productivity_amount,traffic_flow,collisions,lat,lng,desc
3078A,14.47,3617.0,9.962400e+04,222.000000,0.061377,15.342087,6884.865238,249.965446,7.0,-35.225924,149.028609,"Starke St, Higgins"
0243I,5.83,8996.0,3.719150e+04,75.452973,0.008387,12.942191,6379.330246,1543.053173,6.0,-35.234002,149.195269,"Majura Parkway, Canberra"
3014A,231.10,77122.0,1.322098e+06,2680.000000,0.034750,11.596711,5720.891389,333.717006,36.0,-35.336347,149.148487,"Narrabundah College, 20 Jerrabomberra Ave, Nar..."
3050A,60.48,13309.0,3.209558e+05,682.781411,0.051302,11.289375,5306.809345,220.056217,14.0,-35.196955,149.145502,"135 Oodgeroo Ave, Franklin"
3016A,192.90,60762.0,1.038999e+06,2127.000000,0.035005,11.026439,5386.205288,314.992224,13.0,-35.331375,149.122657,"46 Mugga Way, Red Hill"
...,...,...,...,...,...,...,...,...,...,...,...,...
0087D,1.25,206.0,0.000000e+00,0.000000,0.000000,0.000000,0.000000,164.800000,10.0,-35.426980,149.111070,"63 Clift Cres, Richardson"
0006F,2.83,1033.0,0.000000e+00,0.000000,0.000000,0.000000,0.000000,365.017668,44.0,-35.271350,149.050330,"Coppins Crossing Rd, Molonglo Valley"
0087A,2.66,422.0,0.000000e+00,0.000000,0.000000,0.000000,0.000000,158.646617,7.0,-35.426780,149.107410,"56 Clift Cres, Richardson"
0087F,2.66,926.0,0.000000e+00,0.000000,0.000000,0.000000,0.000000,348.120301,8.0,-35.429970,149.119980,"123 Clift Cres, Chisholm"


In [47]:
cameras.describe()

Unnamed: 0,duration,checked,amount,fines,speeding,productivity_fines,productivity_amount,traffic_flow,collisions,lat,lng
count,977.0,977.0,977.0,977.0,977.0,977.0,977.0,977.0,977.0,977.0,977.0
mean,39.317103,19322.295803,34539.82,79.57746,0.004083,1.123118,475.856376,372.395917,24.471853,-36158.33,149.100846
std,51.120238,40553.939783,116052.4,257.09188,0.006356,1.610951,728.701846,247.868981,34.821198,1129097.0,0.045648
min,1.25,9.0,0.0,0.0,0.0,0.0,0.0,7.2,0.0,-35292230.0,149.003831
25%,9.65,2327.0,1075.836,2.921079,0.000801,0.247853,98.372224,193.276109,6.0,-35.36836,149.06757
50%,22.45,6453.0,4457.933,10.628146,0.00188,0.561057,223.308271,323.255814,12.0,-35.31233,149.09867
75%,48.16,15708.0,19002.0,45.614194,0.004357,1.410924,578.151159,490.711806,28.0,-35.24087,149.13466
max,448.97,352366.0,1456761.0,3613.290566,0.061377,15.342087,6884.865238,1543.053173,300.0,-35.15319,149.376737


In [36]:
cameras["new_amount"] = cameras.new_duration * cameras.productivity_amount

In [42]:
(cameras.new_amount.sum() - cameras.amount.sum()) / cameras.amount.sum()

0.7151840497052887

In [50]:
cameras.sort_values("collisions", ascending=False).head(50)

Unnamed: 0,duration,checked,amount,fines,speeding,productivity_fines,productivity_amount,traffic_flow,collisions,lat,lng,desc
0014C,56.12,17437.0,11454.27,23.89713,0.00137,0.425822,204.10312,310.709195,300.0,-35.34902,149.0855,"103 Hindmarsh Dr, Phillip"
0014D,1.33,170.0,111.672,0.232982,0.00137,0.175175,83.963942,127.819549,298.0,-35.34883,149.08531,"103 Hindmarsh Dr, Phillip"
0014F,18.62,12676.0,8326.793,17.372256,0.00137,0.932989,447.196188,680.773362,249.0,-35.34778,149.07349,"Hindmarsh Dr, Chifley"
0014A,169.33,87532.0,57499.28,119.96121,0.00137,0.708446,339.569349,516.931436,236.0,-35.34909,149.08742,"Hindmarsh Dr, Phillip"
0171E,20.79,7084.0,2368.339,5.420554,0.000765,0.260729,113.917243,340.740741,219.0,-35.32109,149.14832,"91 Wentworth Ave, Kingston"
0134A,1.33,532.0,297.0,1.0,0.00188,0.75188,223.308271,400.0,215.0,-35.32241,149.14665,"115 Canberra Ave, Griffith"
0086D,52.73,22923.0,32709.48,79.773064,0.00348,1.512859,620.320037,434.724066,212.0,-35.251716,149.075236,"Canberra High School, 4 Bindubi St, Macquarie"
0014B,21.2,8441.0,5544.845,11.568256,0.00137,0.545672,261.54931,398.160377,200.0,-35.34889,149.08727,"Hindmarsh Dr, Phillip"
0066C,31.68,7808.0,2450.641,5.034875,0.000645,0.158929,77.356105,246.464646,198.0,-35.23438,149.08972,"Ginninderra Dr, Bruce"
0004I,27.45,16220.0,7482.327,19.147358,0.00118,0.697536,272.580234,590.892532,180.0,-35.25115,149.07759,"Belconnen Way, Bruce"
