In [1]:
import pandas as pd
import requests
import folium
from geopy.geocoders import Nominatim
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [2]:
def get_vicpop_data(port):
    fission_url = f"http://localhost:{port}/search-vic-population"
    response = requests.get(fission_url,verify=False)
    return response.json()
def get_viccrimegov_data(port):
    fission_url = f"http://localhost:{port}/search-vic-crimegov"
    response = requests.get(fission_url,verify=False)
    return response.json()

In [3]:
def process_pop_map():
    population_data = pd.DataFrame(get_vicpop_data(9030))
    population_data.columns = population_data.columns.str.strip()

    # Remove the suffixes from 'lga_name'
    population_data['lga_name'] = population_data['lga_name'].str.split(' \(').str[0]

    # Group by 'lga_name' and sum up the population for each location
    population_data = population_data.groupby('lga_name').sum().reset_index()
    return population_data

In [4]:
def process_crime_lga():
    crime_lga = pd.DataFrame(get_viccrimegov_data(9030))
    crime_lga['Sum of Offence Count' ] = crime_lga['Sum of Offence Count' ].astype(str)
    crime_lga['Sum of Offence Count' ] = crime_lga['Sum of Offence Count' ].str.replace(',', '')
    crime_lga['Sum of Offence Count' ] = crime_lga['Sum of Offence Count' ].astype(int)
    pivot_df = crime_lga.pivot(index='Local Government Area: Descending', columns='filters', values='Sum of Offence Count')
    # Rename columns
    pivot_df.columns = ['scrime_' + str(col) for col in pivot_df.columns]
    # Reset index
    pivot_df.reset_index(inplace=True)
    return pivot_df

In [5]:
def combine_pop_crime_map():
    population_data = process_pop_map()
    pivot_df = process_crime_lga()
    merged_df = pd.merge(pivot_df, population_data, left_on='Local Government Area: Descending', right_on='lga_name', how='inner')

    # Drop the redundant 'Local Government Area' column
    merged_df.drop('lga_name', axis=1, inplace=True)
    merged_df.rename(columns={'Local Government Area: Descending': 'lga'}, inplace=True)
    return merged_df

In [6]:
def get_user_input_location():
    user_lga_names = input("Enter the LGA name(s) separated by comma: ").split(',')
    user_year = input("Enter the year: ")
    if int(user_year)>2023:
            print(f'The input year is in the future\nThe crime count is predicted based on the estimated population')
    return user_lga_names,user_year

In [7]:
def predict_crime_count(lga_name, user_year,merged_df,lga_data):
        population_columns = [f'tpop_{year}' for year in range(2019, 2024)]
        crime_columns = [f'scrime_{year}' for year in range(2019, 2024)]

        X_train = merged_df[merged_df['lga'] == lga_name][population_columns].values.flatten()  # Population data flattened to 1D array
        y_train = merged_df[merged_df['lga'] == lga_name][crime_columns].values.flatten()
        poly = PolynomialFeatures(degree=2)
        X_poly_train = poly.fit_transform(X_train.reshape(-1, 1))
        model = LinearRegression()
        model.fit(X_poly_train, y_train)

        new_population = lga_data.iloc[0][f'tpop_{user_year}']  # Adjust this value as needed
        new_population_poly = poly.transform([[new_population]])
        crime_predict = model.predict(new_population_poly)
        crime_predict = int(crime_predict[0])
        return crime_predict

In [8]:
def print_pop_crime_map():
    user_lga_names,user_year = get_user_input_location()
    merged_df = combine_pop_crime_map()
    # Filter data for the specified LGAs and year
    filtered_data = merged_df[merged_df['lga'].isin(user_lga_names)]
    # Initialize the geocoder
    geolocator = Nominatim(user_agent="city_locator")
    # Get the coordinates for each LGA
    locations = {}
    population = {}
    crime_count = {}
    for lga_name in user_lga_names:
        # Find the location coordinates for each LGA
        location = geolocator.geocode(lga_name + ', Australia')
        if location:
            locations[lga_name] = (location.latitude, location.longitude)

            # Extract population for the specified year and LGA
            lga_data = filtered_data[(filtered_data['lga'] == lga_name) & (filtered_data[f'tpop_{user_year}'].notnull())]
            if not lga_data.empty:
                population[lga_name] = lga_data.iloc[0][f'tpop_{user_year}']
                if 2019 <= int(user_year) <= 2023:
                    crime_count[lga_name] = lga_data.iloc[0][f'scrime_{user_year}']
                elif int(user_year) > 2023:
                    crime_count[lga_name] = predict_crime_count(lga_name, int(user_year),merged_df,lga_data)
    # Create a map centered at the specified location
    map_center = [-36, 145]  # Center of Australia
    if locations:
        m = folium.Map(location=map_center, zoom_start=6)
        # Add markers for each location with population information
        for lga_name, location in locations.items():
            if lga_name in population:
                popup_text = f"{lga_name}<br>Population in {user_year}: {population[lga_name]}<br>Crime in {user_year}: {crime_count[lga_name]}"
            else:
                popup_text = f"No population data available for {user_year}"

            folium.Marker(
                location=location,
                popup=popup_text,
                tooltip=lga_name
            ).add_to(m)

        # Display the map
        display(m)
    else:
        print("No valid locations found.")

In [10]:
print_pop_crime_map()

Enter the LGA name(s) separated by comma: Melbourne,Alpine
Enter the year: 2028
The input year is in the future
The crime count is predicted based on the estimated population
