# Spatial Data Analysis

## Configuration

In [None]:
# Check free memory available
%system free -m

In [None]:
# Import the necessary libraries

# Basic python libraries
import os
import numpy as np

# Data visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Geospatial data visualization
import geopandas as gpd
import folium

# Google cloud libraries
from google.cloud import bigquery
from google.cloud import storage

# Pandas and BigQuery
import pandas_gbq as pdg
import pandas as pd

In [None]:
# Current working directory
os.getcwd()

In [None]:
# Set output data and output locations
raw_data = "../data/raw/"
interim_data = "../data/interim/"
processed_data = "../data/processed/"

figures = "../reports/figures/"
config = "../config/"

## Data Loading

### neet_chd

In [None]:
# Set the filename for your CSV file
csv_filename = "neet_chd.csv"

# Combine the path and filename
csv_filepath = os.path.join(processed_data, csv_filename)

# Load the CSV file into a DataFrame
neet_chd_df = pd.read_csv(csv_filepath)

# Display the loaded DataFrame
neet_chd_df

### other_covariates

In [None]:
# Set the filename for your CSV file
csv_filename = "other_covariates.csv"

# Combine the path and filename
csv_filepath = os.path.join(processed_data, csv_filename)

# Load the CSV file into a DataFrame
other_covariates_df = pd.read_csv(csv_filepath)

# Display the loaded DataFrame
other_covariates_df

## Spatial Analysis by LSOA

### Data Filteration

In [None]:
# Filter the neet_chd_df by 'home_lsoa_in_bradford'
neet_chd_filtered = neet_chd_df[(neet_chd_df['home_lsoa_in_bradford'])]

### Count of individuals by LSOA in Bradford

In [None]:
# Set 'lsoa_code' as the index
neet_chd_filtered = neet_chd_filtered.set_index('lsoa_code')

# Aggregate data by 'lsoa_name' and summing the count of different scenarios separately
neet_count_df = neet_chd_filtered.groupby('lsoa_name').agg(
    ever_neet_status=('ever_neet_status', 'sum'),
    persistent_neet_status=('persistent_neet_status', 'sum'),
    total_persons=('person_id', 'count')
).reset_index()

# Calculate the percentages directly on the DataFrame
neet_count_df['ever_neet_status_percentage'] = (neet_count_df['ever_neet_status'] / neet_count_df['total_persons'] * 100).round(2)
neet_count_df['persistent_neet_status_percentage'] = (neet_count_df['persistent_neet_status'] / neet_count_df['total_persons'] * 100).round(2)

# Display the result
neet_count_df

### DataFrames to GeoDataFrames

In [None]:
# Convert the neet_chd_filtered to GeoDataFrame
neet_crs = {'init': 'epsg:4326'}
neet_geometry = gpd.GeoDataFrame(neet_chd_filtered, geometry=gpd.GeoSeries.from_wkt(neet_chd_filtered['geometry']), crs=neet_crs)

In [None]:
# Obtain the geometric component of the neet_geometry
neet_geometry = gpd.GeoDataFrame(neet_geometry[['lsoa_name', 'latitude', 'longitude', 'ward_name', 'geometry']].drop_duplicates())

# Display the unique values as a GeoDataFrame
neet_geometry

### Geometric Components

In [None]:
# Obtain the geometric component of the neet_geometry
neet_geometry = gpd.GeoDataFrame(neet_geometry[['lsoa_name', 'latitude', 'longitude', 'ward_name', 'geometry']].drop_duplicates())

# Display the unique values as a GeoDataFrame
neet_geometry

### Merge Dataframes

In [None]:
# Merge count_df with neet_geometry_gdf on the column 'lsoa_name' using an inner join
neet_geometry_count_gdf = pd.merge(neet_count_df, neet_geometry, on='lsoa_name', how='left')

# Convert the result to a GeoDataFrame
neet_geometry_count_gdf = gpd.GeoDataFrame(neet_geometry_count_gdf, geometry='geometry')

# Display the result
neet_geometry_count_gdf

### Build Choropleth Map using Percentage

In [None]:
# Create a Folium map
m = folium.Map(location=[neet_geometry_count_gdf.geometry.centroid.y.mean(), neet_geometry_count_gdf.geometry.centroid.x.mean()], zoom_start=11)

# Set the column's name into a list
count_columns = ['ever_neet_status', 'persistent_neet_status']

# Calculate the maximum value among all layers to set a consistent color scale range
max_value = neet_geometry_count_gdf[[f'{column}_percentage' for column in count_columns]].max().max()

# Add choropleth layers for each density column
for count_column in count_columns:
    # Calculate the percentage with two decimal points
    neet_geometry_count_gdf[f'{count_column}_percentage'] = (neet_geometry_count_gdf[count_column] / neet_geometry_count_gdf['total_persons']) * 100
    neet_geometry_count_gdf[f'{count_column}_percentage'] = neet_geometry_count_gdf[f'{count_column}_percentage'].round(2)

    choropleth = folium.Choropleth(
        geo_data=neet_geometry_count_gdf,
        name=f'{count_column}_percentage',
        data=neet_geometry_count_gdf,
        columns=['lsoa_name', f'{count_column}_percentage'],
        key_on='feature.properties.lsoa_name',
        fill_color='BuGn',
        fill_opacity=0.75,
        line_opacity=0.75,
        legend_name=f'{count_column} percentage',
        bins=np.linspace(0, max_value, 6)  # Specify the same color scale range for all layers
    ).add_to(m)

    # Add tooltips with 'ward_name'
    choropleth.geojson.add_child(folium.features.GeoJsonTooltip(['lsoa_name', 'ward_name', count_column, f'{count_column}_percentage'], aliases=['LSOA Name:', 'Ward Name:', 'Count:', 'Percent:']))
    
# Add Layer Control to the map
folium.LayerControl(collapsed=False).add_to(m)

# Save the map
bradford_lsoa_map = "../reports/figures/bradford_lsoa_map.html"
m.save(bradford_lsoa_map)

# Display the map
display(m)
