Prepare environment and necessary libraries for data visualization

In [42]:
!pip install geopandas
!pip install ipympl
!pip install boto3
!pip install s3fs

Collecting s3fs
  Downloading s3fs-2024.10.0-py3-none-any.whl.metadata (1.7 kB)
Collecting fsspec==2024.10.0.* (from s3fs)
  Downloading fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB)
Downloading s3fs-2024.10.0-py3-none-any.whl (29 kB)
Downloading fsspec-2024.10.0-py3-none-any.whl (179 kB)
Installing collected packages: fsspec, s3fs
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2023.6.0
    Uninstalling fsspec-2023.6.0:
      Successfully uninstalled fsspec-2023.6.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
jupyter-scheduler 2.9.0 requires fsspec==2023.6.0, but you have fsspec 2024.10.0 which is incompatible.[0m[31m
[0mSuccessfully installed fsspec-2024.10.0 s3fs-2024.10.0


Overview of distribution in land value, new project value, and property improvement value in areas of the city of Vancouver

In [88]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.lines import Line2D
from matplotlib.ticker import MaxNLocator, ScalarFormatter
import ipywidgets as widgets
from IPython.display import display
import boto3
import s3fs

# Set up S3 access using boto3 and s3fs
s3 = s3fs.S3FileSystem(anon=False)  # Set anonymous to False if using AWS credentials

# S3 paths for your data files
csv_s3_path = "s3://cmpt732-project-raw-data/analysis_output/vancouver_land_value_and_investment.csv"
shapefile_s3_path = "s3://cmpt732-project-raw-data/local-area-boundary/local-area-boundary.shp"

# Load the CSV data from S3
df = pd.read_csv(csv_s3_path)

# Load the Vancouver shapefile from S3 using GeoPandas and S3FS
gdf = gpd.read_file(shapefile_s3_path, engine='pyogrio')

# Reproject to a projected CRS (e.g., UTM, EPSG:26910 is common for Vancouver)
gdf = gdf.to_crs(epsg=26910)


# Function to update the plot based on selected year
def update_value_plot(year):
    # Filter data by selected year
    filtered_data = df[df['year'] == year]

    # Merge with the shapefile data based on local_area
    merged_data = gdf.merge(filtered_data, left_on='name', right_on='local_area')

    # Extract centroids for 3D bar placement
    merged_data['centroid_x'] = merged_data.geometry.centroid.x
    merged_data['centroid_y'] = merged_data.geometry.centroid.y

    # Set up 3D plot
    fig = plt.figure(figsize=(15, 12))  
    ax = fig.add_subplot(111, projection='3d')

    # Adjust the viewing angle for z-axis on the left
    ax.view_init(elev=20, azim=60)

    # Add base map outline
    for _, row in merged_data.iterrows():
        x, y = row.geometry.exterior.xy
        ax.plot(x, y, 0, color='black', alpha=0.5)  # Base map outline

    # Plot 3D bars for the three chosen metrics
    metrics = ['average_land_value', 'average_project_value', 'average_improvement_value']
    colors = ['red', 'green', 'blue']  # Colors for each metric

    # Define smaller bar width and depth for better visibility
    bar_width = 200  # Reduced size for visibility
    bar_depth = 200  # Reduced size for visibility

    for _, row in merged_data.iterrows():
        x = row['centroid_x']
        y = row['centroid_y']
        z = 0  # Base of the bars

        # Plot each metric with a different color
        for i, metric in enumerate(metrics):
            height = row[metric]
            ax.bar3d(x + i * bar_width, y, z, dx=bar_width, dy=bar_depth, dz=height, color=colors[i], alpha=0.5)

    # Add local area labels above map regions with a small offset from the 3D bars
    label_offset = 200  # Adjust this value for a larger or smaller offset
    for _, row in merged_data.iterrows():
        x = row['centroid_x']
        y = row['centroid_y']
        z = 0  # Base of the bars

        # Add an offset to the z position to place labels above the bars
        ax.text(x + label_offset, y + label_offset, z, row['name'], color='black', fontsize=10, ha='center', va='center', zorder=5)

    # Hide axes and grid
    ax.grid(False)
    ax.axis('on')

    # Adjust scaling for better visibility (increase axis limits to accommodate larger scales)
    ax.set_box_aspect([1, 1, 0.5])  # Expand the X and Y axes relative to Z

    # Set title with reduced padding
    ax.set_title(f"Vancouver Land Value and Property Investments ({year})", fontsize=16, pad=20)

    # Add axis titles
    ax.set_xlabel("Longitude", fontsize=12)
    ax.set_ylabel("Latitude", fontsize=12)
    ax.set_zlabel("Value (M CAD)", fontsize=12)  # The z-axis label is changed to reflect the monetary values

    # Customize Z-axis tick formatting
    ax.zaxis.set_major_locator(MaxNLocator(nbins=10))  # Increase tick density

    # Create a custom legend
    legend_elements = [
        Line2D([0], [0], marker='o', color='w', markerfacecolor='red', markersize=10, label='Average Land Value'),
        Line2D([0], [0], marker='o', color='w', markerfacecolor='green', markersize=10, label='Average New Project Value'),
        Line2D([0], [0], marker='o', color='w', markerfacecolor='blue', markersize=10, label='Average Improvement Value')
    ]
    ax.legend(handles=legend_elements, loc='upper left', fontsize=10, title="Metrics")

    # Let matplotlib autoscale the view after plotting
    ax.autoscale_view()

    # Use tight layout to reduce whitespace
    plt.tight_layout()

    # Show the plot
    plt.show()


# Create a slider for year selection
value_year_slider = widgets.IntSlider(
    value=2023,
    min=df['year'].min(),
    max=df['year'].max(),
    step=1,
    description='Year:',
    continuous_update=True,
    layout=widgets.Layout(width='40%')  # Adjusted widget width to fit within notebook cell
)

# Display the slider and bind it to the update_plot function
interactive_value_plot = widgets.interactive(update_value_plot, year=value_year_slider)
display(interactive_value_plot)


interactive(children=(IntSlider(value=2023, description='Year:', layout=Layout(width='40%'), max=2024, min=202…

Trend of land value, property investment, and property improvement for areas in the city of Vancouver

In [102]:
# Function to plot the data based on selected local area
def plot_value_trends(local_area):
    # Ensure the 'year' column is of integer type
    df['year'] = df['year'].astype(int)
    
    # Filter data for the selected local area
    df_local = df[df['local_area'] == local_area]
    
    # Sort the data by 'year' to avoid backward lines
    df_local = df_local.sort_values(by='year')
    
    # Create the plot
    plt.figure(figsize=(10, 6))
    
    # Plot each trend line
    plt.plot(df_local['year'], df_local['average_land_value'], label='Average Land Value', color='blue', marker='o')
    plt.plot(df_local['year'], df_local['average_project_value'], label='Average Project Value', color='green', marker='s')
    plt.plot(df_local['year'], df_local['average_improvement_value'], label='Average Improvement Value', color='red', marker='^')
    
    # Add labels and title
    plt.xlabel('Year')
    plt.ylabel('Value ($)')
    plt.title(f"Trends in Average Values for {local_area}")
    plt.legend()
    
    # Show the plot
    plt.grid(True)
    plt.show()

# Create a dropdown widget for selecting the local area
local_area_dropdown = widgets.Dropdown(
    options=df['local_area'].unique(),
    value=df['local_area'].unique()[0],  # Default value
    description='Local Area:',
)

# Create an interactive plot with the dropdown
interactive_value_trend_plot = widgets.interactive(plot_value_trends, local_area=local_area_dropdown)

# Display the widget and plot
display(interactive_value_trend_plot)


interactive(children=(Dropdown(description='Local Area:', options=('Victoria-Fraserview', 'Strathcona', 'Dunba…

Overview of distribution in zoning classifications in areas of the city of Vancouver

In [111]:
def update_classification_plot(year):
    # Filter data by selected year
    filtered_data = df[df['year'] == year]

    # Merge with the shapefile data based on local_area
    merged_data = gdf.merge(filtered_data, left_on='name', right_on='local_area')

    # Extract centroids for 3D bar placement
    merged_data['centroid_x'] = merged_data.geometry.centroid.x
    merged_data['centroid_y'] = merged_data.geometry.centroid.y

    # Set up 3D plot
    fig = plt.figure(figsize=(15, 12))  
    ax = fig.add_subplot(111, projection='3d')

    # Adjust the viewing angle for z-axis on the left
    ax.view_init(elev=20, azim=60)

    # Add base map outline
    for _, row in merged_data.iterrows():
        x, y = row.geometry.exterior.xy
        ax.plot(x, y, 0, color='black', alpha=0.5)  # Base map outline

    # Plot 3D bars for the two chosen metrics (residential zoning count and non-residential zoning count)
    metrics = ['residential_zoning_count', 'non_residential_zoning_count']  # Updated metrics
    colors = ['blue', 'red']  # Colors for each metric

    # Define smaller bar width and depth for better visibility
    bar_width = 200  # Reduced size for visibility
    bar_depth = 200  # Reduced size for visibility

    for _, row in merged_data.iterrows():
        x = row['centroid_x']
        y = row['centroid_y']
        z = 0  # Base of the bars

        # Get residential and non-residential zoning counts
        residential_zoning_count = row['residential_zoning_count']
        non_residential_zoning_count = row['non_residential_zoning_count']

        # Plot each metric with a different color
        # First bar for residential zoning count
        ax.bar3d(x - bar_width/2, y, z, dx=bar_width, dy=bar_depth, dz=residential_zoning_count, color=colors[0], alpha=0.5)

        # Second bar for non-residential zoning count
        ax.bar3d(x + bar_width/2, y, z, dx=bar_width, dy=bar_depth, dz=non_residential_zoning_count, color=colors[1], alpha=0.5)

    # Add local area labels above map regions with a small offset from the 3D bars
    label_offset = 200  # Adjust this value for a larger or smaller offset
    for _, row in merged_data.iterrows():
        x = row['centroid_x']
        y = row['centroid_y']
        z = 0  # Base of the bars

        # Add an offset to the z position to place labels above the bars
        ax.text(x, y + label_offset, z, row['name'], color='black', fontsize=10, ha='center', va='center', zorder=5)

    # Hide axes and grid
    ax.grid(False)
    ax.axis('on')

    # Adjust scaling for better visibility (increase axis limits to accommodate larger scales)
    ax.set_box_aspect([1, 1, 0.5])  # Expand the X and Y axes relative to Z

    # Set title with reduced padding
    ax.set_title(f"Vancouver Zoning by Classification ({year})", fontsize=16, pad=20)

    # Add axis titles
    ax.set_xlabel("Longitude", fontsize=12)
    ax.set_ylabel("Latitude", fontsize=12)
    ax.set_zlabel("Count (M)", fontsize=12)  # The z-axis label is changed to reflect the counts

    # Customize Z-axis tick formatting
    ax.zaxis.set_major_locator(MaxNLocator(nbins=10))  # Increase tick density

    # Create a custom legend
    legend_elements = [
        Line2D([0], [0], marker='o', color='w', markerfacecolor='blue', markersize=10, label='Residential Zoning'),
        Line2D([0], [0], marker='o', color='w', markerfacecolor='red', markersize=10, label='Non-Residential Zoning')
    ]
    ax.legend(handles=legend_elements, loc='upper left', fontsize=10, title="Zoning Type")

    # Let matplotlib autoscale the view after plotting
    ax.autoscale_view()

    # Use tight layout to reduce whitespace
    plt.tight_layout()

    # Show the plot
    plt.show()


# Create a slider for year selection
classification_year_slider = widgets.IntSlider(
    value=2023,
    min=df['year'].min(),
    max=df['year'].max(),
    step=1,
    description='Year:',
    continuous_update=True,
    layout=widgets.Layout(width='40%')  # Adjusted widget width to fit within notebook cell
)

# Display the slider and bind it to the update_classification_plot function
interactive_classification_plot = widgets.interactive(update_classification_plot, year=classification_year_slider)
display(interactive_classification_plot)

interactive(children=(IntSlider(value=2023, description='Year:', layout=Layout(width='40%'), max=2024, min=202…

Pie Chart of distribution in zoning classifications for the city of Vancouver by local area

In [110]:
# Function to plot the "3D" pie chart of zoning classifications based on selected local area and year
def plot_zoning_3d_pie(local_area, year):
    # Ensure the 'year' column is of integer type
    df['year'] = df['year'].astype(int)
    
    # Filter data for the selected local area and year
    df_local = df[(df['local_area'] == local_area) & (df['year'] == year)]
    
    # If no data for the selected combination, return
    if df_local.empty:
        print(f"No data available for {local_area} in year {year}")
        return
    
    # Data for the pie chart
    residential_count = df_local['residential_zoning_count'].values[0]
    non_residential_count = df_local['non_residential_zoning_count'].values[0]
    
    # Create a 2D pie chart with a 3D-like effect
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Zoning labels and values
    labels = ['Residential Zoning', 'Non-Residential Zoning']
    values = [residential_count, non_residential_count]
    
    # Plot the pie chart with a shadow and explode effect for visual "3D" look
    wedges, texts, autotexts = ax.pie(values, labels=labels, autopct='%1.1f%%', startangle=90, 
                                      explode=(0.1, 0), shadow=True)
    
    # Add 3D-like effect by slightly changing the angle
    ax.set_title(f"Zoning Classifications for {local_area} in {year}")
    ax.axis('equal')  # Equal aspect ratio ensures the pie is drawn as a circle.
    
    # Show the plot
    plt.show()

# Create a dropdown widget for selecting the local area
local_area_dropdown = widgets.Dropdown(
    options=df['local_area'].unique(),
    value=df['local_area'].unique()[0],  # Default value
    description='Local Area:',
)

# Create a slider widget for selecting the year
year_slider = widgets.IntSlider(
    min=df['year'].min(),
    max=df['year'].max(),
    value=df['year'].min(),  # Default value
    description='Year:',
    continuous_update=False
)

# Create an interactive plot with the dropdown and slider
interactive_pie_chart = widgets.interactive(plot_zoning_3d_pie, local_area=local_area_dropdown, year=year_slider)

# Display the widgets and the plot
display(interactive_pie_chart)

interactive(children=(Dropdown(description='Local Area:', options=('Victoria-Fraserview', 'Strathcona', 'Dunba…

Graph of distribution in property development in areas of the city of Vancouver

In [112]:
def update_development_plot(year):
    # Filter data by selected year
    filtered_data = df[df['year'] == year]

    # Merge with the shapefile data based on local_area
    merged_data = gdf.merge(filtered_data, left_on='name', right_on='local_area')

    # Extract centroids for 3D bar placement
    merged_data['centroid_x'] = merged_data.geometry.centroid.x
    merged_data['centroid_y'] = merged_data.geometry.centroid.y

    # Set up 3D plot
    fig = plt.figure(figsize=(15, 12))  
    ax = fig.add_subplot(111, projection='3d')

    # Adjust the viewing angle for z-axis on the left
    ax.view_init(elev=20, azim=60)

    # Add base map outline
    for _, row in merged_data.iterrows():
        x, y = row.geometry.exterior.xy
        ax.plot(x, y, 0, color='black', alpha=0.5)  # Base map outline

    # Plot 3D bars for the two chosen metrics (residential property development and non-residential property development)
    metrics = ['residential_property_use_count', 'non_residential_property_use_count']  # Updated metrics
    colors = ['green', 'orange']  # Colors for each metric

    # Define smaller bar width and depth for better visibility
    bar_width = 200  # Reduced size for visibility
    bar_depth = 200  # Reduced size for visibility

    for _, row in merged_data.iterrows():
        x = row['centroid_x']
        y = row['centroid_y']
        z = 0  # Base of the bars

        # Get residential and non-residential property use counts
        residential_property_use_count = row['residential_property_use_count']
        non_residential_property_use_count = row['non_residential_property_use_count']

        # Plot each metric with a different color
        # First bar for residential property use count
        ax.bar3d(x - bar_width/2, y, z, dx=bar_width, dy=bar_depth, dz=residential_property_use_count, color=colors[0], alpha=0.5)

        # Second bar for non-residential property use count
        ax.bar3d(x + bar_width/2, y, z, dx=bar_width, dy=bar_depth, dz=non_residential_property_use_count, color=colors[1], alpha=0.5)

    # Add local area labels above map regions with a small offset from the 3D bars
    label_offset = 200  # Adjust this value for a larger or smaller offset
    for _, row in merged_data.iterrows():
        x = row['centroid_x']
        y = row['centroid_y']
        z = 0  # Base of the bars

        # Add an offset to the z position to place labels above the bars
        ax.text(x, y + label_offset, z, row['name'], color='black', fontsize=10, ha='center', va='center', zorder=5)

    # Hide axes and grid
    ax.grid(False)
    ax.axis('on')

    # Adjust scaling for better visibility (increase axis limits to accommodate larger scales)
    ax.set_box_aspect([1, 1, 0.5])  # Expand the X and Y axes relative to Z

    # Set title with reduced padding
    ax.set_title(f"Vancouver Property Development by Usage ({year})", fontsize=16, pad=20)

    # Add axis titles
    ax.set_xlabel("Longitude", fontsize=12)
    ax.set_ylabel("Latitude", fontsize=12)
    ax.set_zlabel("Count (M)", fontsize=12)  # The z-axis label is changed to reflect the counts

    # Customize Z-axis tick formatting
    ax.zaxis.set_major_locator(MaxNLocator(nbins=10))  # Increase tick density

    # Create a custom legend
    legend_elements = [
        Line2D([0], [0], marker='o', color='w', markerfacecolor='green', markersize=10, label='Residential Property Development'),
        Line2D([0], [0], marker='o', color='w', markerfacecolor='orange', markersize=10, label='Non-Residential Property Development')
    ]
    ax.legend(handles=legend_elements, loc='upper left', fontsize=10, title="Property Development Type")

    # Let matplotlib autoscale the view after plotting
    ax.autoscale_view()

    # Use tight layout to reduce whitespace
    plt.tight_layout()

    # Show the plot
    plt.show()


# Create a slider for year selection
development_year_slider = widgets.IntSlider(
    value=2023,
    min=df['year'].min(),
    max=df['year'].max(),
    step=1,
    description='Year:',
    continuous_update=True,
    layout=widgets.Layout(width='40%')  # Adjusted widget width to fit within notebook cell
)


# Display the slider and bind it to the update_development_plot function
interactive_development_plot = widgets.interactive(update_development_plot, year=development_year_slider)
display(interactive_development_plot)

interactive(children=(IntSlider(value=2023, description='Year:', layout=Layout(width='40%'), max=2024, min=202…

Pie Chart of distribution in property usage type for the city of Vancouver by local area

In [113]:
# Function to plot the "3D" pie chart of property use classifications based on selected local area and year
def plot_property_use_pie(local_area, year):
    # Ensure the 'year' column is of integer type
    df['year'] = df['year'].astype(int)
    
    # Filter data for the selected local area and year
    df_local = df[(df['local_area'] == local_area) & (df['year'] == year)]
    
    # If no data for the selected combination, return
    if df_local.empty:
        print(f"No data available for {local_area} in year {year}")
        return
    
    # Data for the pie chart
    residential_property_use_count = df_local['residential_property_use_count'].values[0]
    non_residential_property_use_count = df_local['non_residential_property_use_count'].values[0]
    
    # Create a 2D pie chart with a 3D-like effect
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Property use labels and values
    labels = ['Residential Property Use', 'Non-Residential Property Use']
    values = [residential_property_use_count, non_residential_property_use_count]
    
    # Plot the pie chart with a shadow and explode effect for visual "3D" look
    wedges, texts, autotexts = ax.pie(values, labels=labels, autopct='%1.1f%%', startangle=90, 
                                      explode=(0.1, 0), shadow=True)
    
    # Add 3D-like effect by slightly changing the angle
    ax.set_title(f"Property Use Classifications for {local_area} in {year}")
    ax.axis('equal')  # Equal aspect ratio ensures the pie is drawn as a circle.
    
    # Show the plot
    plt.show()

# Create a dropdown widget for selecting the local area
local_area_dropdown = widgets.Dropdown(
    options=df['local_area'].unique(),
    value=df['local_area'].unique()[0],  # Default value
    description='Local Area:',
)

# Create a slider widget for selecting the year
year_slider = widgets.IntSlider(
    min=df['year'].min(),
    max=df['year'].max(),
    value=df['year'].min(),  # Default value
    description='Year:',
    continuous_update=False
)

# Create an interactive plot with the dropdown and slider
interactive_property_use_pie = widgets.interactive(plot_property_use_pie, local_area=local_area_dropdown, year=year_slider)

# Display the widgets and the plot
interactive_property_use_pie  # Just use the interactive widget

interactive(children=(Dropdown(description='Local Area:', options=('Victoria-Fraserview', 'Strathcona', 'Dunba…