# Onboarding Project TP17
## Dataset  - Victoria Road Crash Data

In [None]:
# Download Libraries

# !pip3 install folium
# !pip3 install geojson
# !pip -q install flask

## Importing Libraries

In [2]:
# Import Libraries

import pandas as pd
import json
import geopandas as gpd
import folium
from folium.plugins import HeatMap
from IPython.display import IFrame
import geojson
from shapely.geometry import shape, Point
from collections import defaultdict
from sqlalchemy import create_engine
import csv
import IPython
from flask import Flask, send_file
import threading, os, logging
from google.colab.output import eval_js
from IPython.display import IFrame

In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [4]:
# Read and Load GeoJSON file
with open('/content/drive/MyDrive/Colab_Portfolio/project3_victoria_road_crash/VICTORIAN_ROAD_CRASH_DATA.geojson', 'r') as f:
    crash_data = json.load(f)

In [5]:
# Check the columns (keys) in the properties of the first feature
feature = crash_data['features'][0]
columns = feature['properties'].keys()

print("Columns in the GeoJSON file:")
for column in columns:
    print(column)

Columns in the GeoJSON file:
ACCIDENT_NO
ACCIDENT_DATE
ACCIDENT_TIME
ACCIDENT_TYPE
DAY_OF_WEEK
DCA_CODE
DCA_CODE_DESCRIPTION
LIGHT_CONDITION
POLICE_ATTEND
ROAD_GEOMETRY
SEVERITY
SPEED_ZONE
RUN_OFFROAD
ROAD_NAME
ROAD_TYPE
ROAD_ROUTE_1
LGA_NAME
DTP_REGION
LATITUDE
LONGITUDE
VICGRID_X
VICGRID_Y
TOTAL_PERSONS
INJ_OR_FATAL
FATALITY
SERIOUSINJURY
OTHERINJURY
NONINJURED
MALES
FEMALES
BICYCLIST
PASSENGER
DRIVER
PEDESTRIAN
PILLION
MOTORCYCLIST
UNKNOWN
PED_CYCLIST_5_12
PED_CYCLIST_13_18
OLD_PED_65_AND_OVER
OLD_DRIVER_75_AND_OVER
YOUNG_DRIVER_18_25
NO_OF_VEHICLES
HEAVYVEHICLE
PASSENGERVEHICLE
MOTORCYCLE
PT_VEHICLE
DEG_URBAN_NAME
SRNS
RMA
DIVIDED
STAT_DIV_NAME


## Selecting and Filtering Important columns from the dataset

In [6]:
# Important Columns

imp_cols = ["ACCIDENT_NO", "ACCIDENT_DATE","ACCIDENT_TIME", "ACCIDENT_TYPE", "DAY_OF_WEEK", "SEVERITY","LGA_NAME", "LATITUDE","LONGITUDE", "FATALITY"]

In [7]:
# Filtering columns from the crash data
# removing row where bicyclist not involved and LGA name is Melbourne

filtered_data = []
for feature in crash_data['features']:
    properties = feature['properties']
    if properties['BICYCLIST'] > 0 and properties['LGA_NAME']== 'MELBOURNE':
        filtered_feature = {
            "type": "Feature",
            "geometry": feature["geometry"],
            "properties": {key: properties[key] for key in imp_cols}
        }
        filtered_data.append(filtered_feature)


In [8]:
# Creating the GeoJSON structure from filtered data
cleaned_data = {
    "type": "FeatureCollection",
    "features": filtered_data
}

# Save the filtered data to a new GeoJSON file
with open('/content/drive/MyDrive/Colab_Portfolio/project3_victoria_road_crash/filtered_accident_data.geojson', 'w') as f:
    json.dump(cleaned_data, f, indent=2)


## Choropleth map with Bicycle Accidents

In [9]:
# url variable
geourl = '/content/drive/MyDrive/Colab_Portfolio/project3_victoria_road_crash/filtered_accident_data.geojson'

In [16]:
# Load GeoJSON data
geo_data = gpd.read_file(geourl)

# Filter data for the City of Melbourne
melbourne_data = geo_data[geo_data['LGA_NAME'] == 'MELBOURNE']

# Convert to DataFrame for easier aggregation
df = melbourne_data.copy()

# Aggregate by coordinates
accident_counts = df.groupby(['LATITUDE', 'LONGITUDE']).size().reset_index(name='count')

# Create a base map centered around Melbourne
m = folium.Map(location=[-37.8136, 144.9631], zoom_start=15)


In [17]:
# Add accident points to the map with accident counts
for _, row in accident_counts.iterrows():
    folium.CircleMarker(
        location=[row['LATITUDE'], row['LONGITUDE']],
        radius=5,
        popup=f"Accident Count: {row['count']}",
        color='blue',
        fill=True,
        fill_color='blue'
    ).add_to(m)

# Save the map as HTML
m.save('/content/drive/MyDrive/Colab_Portfolio/project3_victoria_road_crash/melbourne_accidents_choropleth.html')
m

## Heatmap

In [18]:
# Extract the coordinates for the heatmap
heat_data = melbourne_data[['LATITUDE', 'LONGITUDE']].values.tolist()

# Add HeatMap to the base map
HeatMap(heat_data).add_to(m)


# Save the map as HTML
m.save('/content/drive/MyDrive/Colab_Portfolio/project3_victoria_road_crash/melbourne_accidents_heatmap.html')
m

## Geopoints indentification in Polygon

In [19]:
# Trying to add suburbs to the Accident dataset
suburbs= ["Carlton", "Carlton North", "Docklands", "East Melbourne", "Flemington", "Kensington", "Melbourne", "North Melbourne",
             "Parkville", "Port Melbourne", "South Wharf", "South Yarra", "Southbank", "West Melbourne", "Windsor"]


# Important Columns
imp_cols_suburb = ['Suburb_Name']


In [21]:
# Loading json file
with open('/content/drive/MyDrive/Colab_Portfolio/project3_victoria_road_crash/vic.json', 'r') as file:
    vic_data = json.load(file)

## Filtering Json data based on the list of susburbs in melbourne

In [None]:
# function to filter suburbs
def filter_json(vic_data, suburbs,imp_cols_suburb):
    filtered_data_suburbs = []
    for feature in vic_data:
        if feature['properties']['Suburb_Name'].strip() in suburbs:
            filtered_feature = {
                "type": "Feature",
                "geometry": feature["geometry"],
                "properties": {key: feature["properties"].get(key) for key in imp_cols_suburb}
            }
            filtered_data_suburbs.append(filtered_feature)
    return{
        "type": "FeatureCollection",
        "features": filtered_data_suburbs
    }

# Check if the data is a list of features or a single FeatureCollection
if isinstance(vic_data, list):
    features = vic_data
else:
    features = vic_data['features']


# Filter the JSON data
filtered_vic_json = filter_json(features, suburbs,imp_cols_suburb)

output_file = '/content/drive/MyDrive/Colab_Portfolio/project3_victoria_road_crash/filtered_vic_json.json'
with open(output_file, 'w') as f:
    json.dump(filtered_vic_json, f, indent=2)




## Loading New Geojson files

In [None]:

# Load the GeoJSON file with accident data
with open('/content/drive/MyDrive/Colab_Portfolio/project3_victoria_road_crash/filtered_accident_data.geojson') as f:
    accident_data = geojson.load(f)

# Load the JSON file with suburb polygons
with open('/content/drive/MyDrive/Colab_Portfolio/project3_victoria_road_crash/filtered_vic_json.json') as f:
    suburb_data = json.load(f)


## Finding accident point belongs to which suburb

In [None]:
# Function to find the suburb for a given point
def find_suburb(point, suburb_features):
    for feature in suburb_features:
        polygon = shape(feature['geometry'])
        if polygon.contains(point):
            return feature['properties']['Suburb_Name']
    return None

# Dictionary to hold accident count per suburb
accident_count_per_suburb = defaultdict(int)

# Dictionary to hold severity counts per suburb
severity_count_per_suburb = defaultdict(lambda: {'Severe': 0, 'Mild': 0, 'Fatal': 0})

# Iterate over each accident and find the suburb it belongs to
updated_features = []
for accident in accident_data['features']:
    point_coords = accident['geometry']['coordinates']
    point = Point(point_coords[0], point_coords[1])
    suburb = find_suburb(point, suburb_data['features'])
    if suburb:  # Only include accidents with a valid suburb
        accident['properties']['Suburb'] = suburb
        updated_features.append(accident)
        accident_count_per_suburb[suburb] += 1
        severity = accident['properties']['SEVERITY']
        if severity == 'Serious injury accident':
            severity_count_per_suburb[suburb]['Severe'] += 1
        elif severity == 'Other injury accident':
            severity_count_per_suburb[suburb]['Mild'] += 1
        elif severity == 'Fatal accident':
            severity_count_per_suburb[suburb]['Fatal'] += 1




# Create the updated GeoJSON structure
updated_accident_data = {
    "type": "FeatureCollection",
    "features": updated_features
}

# Save the updated GeoJSON file
with open('/content/drive/MyDrive/Colab_Portfolio/project3_victoria_road_crash/Final_Accidents_old.geojson', 'w') as f:
    geojson.dump(accident_data, f)


# Load the GeoJSON file with accident data
with open('/content/drive/MyDrive/Colab_Portfolio/project3_victoria_road_crash/Final_Accidents_old.geojson') as f:
    accident_data_new = geojson.load(f)

# Filter out features that do not have the "Suburb" field
filtered_features_new = [feature for feature in accident_data_new['features'] if 'Suburb' in feature['properties']]

# Create the updated GeoJSON structure with filtered features
filtered_accident_data = {
    "type": "FeatureCollection",
    "features": filtered_features_new
}

# Save the updated GeoJSON file
with open('/content/drive/MyDrive/Colab_Portfolio/project3_victoria_road_crash/Final_Accidents.geojson', 'w') as f:
    geojson.dump(filtered_accident_data, f)


# Add accident count, severity count information to the suburb polygons
for feature in suburb_data['features']:
    suburb_name = feature['properties']['Suburb_Name']
    feature['properties']['Accident_Count'] = accident_count_per_suburb[suburb_name]
    feature['properties']['Severe'] = severity_count_per_suburb[suburb_name]['Severe']
    feature['properties']['Mild'] = severity_count_per_suburb[suburb_name]['Mild']
    feature['properties']['Fatal'] = severity_count_per_suburb[suburb_name]['Fatal']


# Create the updated GeoJSON structure for suburbs with accident counts
updated_suburb_data = {
    "type": "FeatureCollection",
    "features": suburb_data['features']
}

# Save the updated GeoJSON file for suburbs with accident counts and severity type
with open('/content/drive/MyDrive/Colab_Portfolio/project3_victoria_road_crash/Final_Suburbs.geojson', 'w') as f:
    geojson.dump(updated_suburb_data, f)


In [None]:
# Create a folium map centered around Melbourne
m = folium.Map(location=[-37.8136, 144.9631], zoom_start=12)

# Define bins for the choropleth to adjust the color range
max_accidents = max(accident_count_per_suburb.values())
min_accidents = min(accident_count_per_suburb.values())
bins = [min_accidents + i * (max_accidents - min_accidents) / 6 for i in range(7)]


# Add the choropleth layer
folium.Choropleth(
    geo_data=updated_suburb_data,
    name='choropleth',
    data=accident_count_per_suburb.items(),
    columns=['Suburb_Name', 'Accident_Count'],
    key_on='feature.properties.Suburb_Name',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Number of Accidents',
    bins=bins,
    reset=True
).add_to(m)

# Function to create a popup for each feature
def popup_function(feature):
    suburb_name = feature['properties']['Suburb_Name']
    accident_count = feature['properties']['Accident_Count']
    return f"Suburb: {suburb_name}<br>Accidents: {accident_count}"

# Add GeoJson layer to handle highlighting and popups
folium.GeoJson(
    updated_suburb_data,
    style_function=lambda feature: {
        'fillColor': '#ffffff00',
        'color': 'black',
        'weight': 0.2
    },
    highlight_function=lambda x: {'weight': 3, 'color': 'black'},
    tooltip=folium.features.GeoJsonTooltip(
        fields=['Suburb_Name', 'Accident_Count'],
        aliases=['Suburb:', 'Number of Accidents:'],
        localize=True
    ),
    popup=folium.GeoJsonPopup(
        fields=['Suburb_Name', 'Accident_Count'],
        aliases=['Suburb:', 'Number of Accidents:']
    )
).add_to(m)

# Add a layer control panel
folium.LayerControl().add_to(m)

In [None]:
# Save the map to an HTML file
m.save('/content/drive/MyDrive/Colab_Portfolio/project3_victoria_road_crash/accidents_choropleth_map_popup.html')
m


In [None]:
suburbs= ["Carlton", "Carlton North", "Docklands", "East Melbourne", "Flemington", "Kensington", "Melbourne", "North Melbourne",
             "Parkville", "Port Melbourne", "South Wharf", "South Yarra", "Southbank", "West Melbourne", "Windsor"]

In [None]:
# accident
# Write the filtered GeoJSON to a new file
with open('/content/drive/MyDrive/Colab_Portfolio/project3_victoria_road_crash/Final_Accidents.geojson') as f:
    Final_Accidents = geojson.load(f)



In [None]:
# Convert the filtered GeoJSON to CSV
csv_file = 'Final_Accidents.csv'
#csv_columns = ["acc_no", "acc_date","acc_time", "acc_type", "acc_day", "severity", "lga_name", "latitude", "longitude", "fatality", "suburb", "coordinates" ]
csv_columns = ["acc_no", "severity", "lga_name", "latitude", "longitude", "fatality", "suburb" ]

with open(csv_file, 'w', newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=csv_columns, delimiter=';')
    writer.writeheader()
    for feature in Final_Accidents['features']:
        acc_no = feature['properties']['ACCIDENT_NO']
       # acc_date = feature['properties']['ACCIDENT_DATE']
       # acc_time = feature['properties']['ACCIDENT_TIME']
       #acc_type = feature['properties']['ACCIDENT_TYPE']
        #acc_day = feature['properties']['DAY_OF_WEEK']
        severity = feature['properties']['SEVERITY']
        lga_name = feature['properties']['LGA_NAME']
        latitude = feature['properties']['LATITUDE']
        longitude = feature['properties']['LONGITUDE']
        fatality = feature['properties']['FATALITY']
        suburb = feature['properties']['Suburb']
        #coordinates = feature['geometry']['coordinates']


        row = {"acc_no":acc_no,
               "severity": severity, "lga_name": lga_name, "latitude": latitude, "longitude": longitude,
               "fatality": fatality, "suburb": suburb
              }
        #row = {"acc_no":acc_no, "acc_date": acc_date, "acc_time": acc_time, "acc_type": acc_type, "acc_day": acc_day,
              # "severity": severity, "lga_name": lga_name, "latitude": latitude, "longitude": longitude,
              # "fatality": fatality, "suburb": suburb,  "coordinates": json.dumps(coordinates)
             # }
        writer.writerow(row)




In [None]:
# suburb
with open('Final_Suburbs.geojson') as f:
    Final_Suburbs = geojson.load(f)

In [None]:
# Convert the filtered GeoJSON to CSV
csv_file_suburbs = 'Final_Suburbs.csv'
csv_columns_suburbs = ["suburb_name", "acc_count", "severe", "mild", "fatal" ]
#, "coordinates"

with open(csv_file_suburbs, 'w', newline='') as csv_file_suburbs:
    writer = csv.DictWriter(csv_file_suburbs, fieldnames=csv_columns_suburbs, delimiter=';')
    writer.writeheader()
    for feature in Final_Suburbs['features']:
        suburb_name = feature['properties']["Suburb_Name"]
        acc_count = feature['properties']["Accident_Count"]
        severe = feature['properties']["Severe"]
        mild = feature['properties']["Mild"]
        fatal = feature['properties']["Fatal"]
        #coordinates = feature['geometry']['coordinates']


        row = {"suburb_name":suburb_name, "acc_count": acc_count, "severe": severe, "mild": mild, "fatal": fatal                }
        writer.writerow(row)


#,"coordinates": json.dumps(coordinates)



## EPIC 2

In [None]:
# Read and Load GeoJSON file
with open('bicycle-routes-including-informal-on-road-and-off-road-routes.geojson', 'r') as f:
    routes_data = json.load(f)


In [None]:
# Extract route features and convert to DataFrame
routes_cols = routes_data['features']
routes_list = []


In [None]:
for cols in routes_cols:
    properties = cols['properties']
    geometry = cols['geometry']
    coordinates = geometry['coordinates']
    properties['coordinates'] = json.dumps(coordinates)
    routes_list.append(properties)


In [None]:
df_routes = pd.DataFrame(routes_list)
df_routes.head()

In [None]:
# unecessary column to be dropped
df_routes = df_routes.drop(["info", "status", "notes"], axis=1)
df_routes.head()

In [None]:
# Count missing values in each column
missing_values_per_column = df_routes.isna().sum()
missing_values_per_column