In [64]:
# import requests
import json
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.colors
import os
from pathlib import Path

import numpy as np
import pandas as pd

from cities.utils.clean_gdp import clean_gdp
from cities.utils.cleaning_utils import standardize_and_scale, find_repo_root
from cities.utils.data_grabber import DataGrabber
from cities.queries.fips_query import FipsQuery


In [91]:
# input what to plot
fips = '36061'
n_kins = 5
variable = "gdp"
type_of_plot = "stacked_bar"

# run fips query
f  = FipsQuery(20003, outcome_var = "gdp",
               feature_groups_with_weights= {"gdp":0, "population":4},
               lag = 3, top =10, time_decay = 1.03)
f.find_euclidean_kins()
kins_df = f.euclidean_kins

# pull data
path = find_repo_root()
variable_df = pd.read_csv(os.path.join(path, "data/processed/" + variable + "_wide.csv"))

# restrict variable_df to GeoFIPS in kins_df
df_kins_variable = get_df_kins_variable(n_kins, kins_df, variable_df)

# high-level plotting functions
[fig, subplot_row_indices, subplot_col_indices] = grid_of_subplots(n_kins, type_of_plot=type_of_plot)
[what_to_plot, trace_row_indices, trace_col_indices] = traces_to_plot(df_kins_variable, subplot_row_indices, subplot_col_indices, type_of_plot=type_of_plot)
place_traces(fig, trace_row_indices, trace_col_indices, what_to_plot, type_of_plot=type_of_plot)

In [89]:
def get_df_kins_variable(n_kins, kins_df, variable_df):
    kins_df = kins_df[0:n_kins]
    df_kins_variable = variable_df[variable_df["GeoFIPS"].isin(kins_df["GeoFIPS"])]
    df_kins_variable = df_kins_variable.reset_index(drop=True)
    return df_kins_variable
    
def grid_of_subplots(n_kins, type_of_plot = "bar"):
    num_rows = int(np.ceil(np.sqrt(n_kins)))
    num_cols = int(np.ceil(n_kins / num_rows))
    match type_of_plot:
        case "bar":
            subplot_type = "bar"
        case "pie":
            subplot_type = "pie"
        case "stacked_bar":
            subplot_type = "bar"
    fig = make_subplots(rows=num_rows, cols=num_cols, specs=[[{"type": subplot_type} for j in range(num_cols)] for i in range(num_rows)])
    
    subplot_row_indices = [i for i in range(1, num_rows + 1) for j in range(1, num_cols + 1)][:n_kins]
    subplot_col_indices = [j for i in range(1, num_rows + 1) for j in range(1, num_cols + 1)][:n_kins]
    
    return fig, subplot_row_indices, subplot_col_indices

def traces_to_plot(df_kins_variable, subplot_row_indices, subplot_col_indices, type_of_plot = "bar"):
    match type_of_plot:
        case "bar":
            what_to_plot, trace_row_indices, trace_col_indices = traces_to_plot_bar(df_kins_variable, subplot_row_indices, subplot_col_indices)
        case "pie":
            trace = go.Pie(labels = [col], values = [row[col]], name = row["GeoName"],
                marker_colors=[color_sequence[color_index % len(color_sequence)]]
            )
        case "stacked_bar":
            what_to_plot, trace_row_indices, trace_col_indices = traces_to_plot_bar(df_kins_variable, subplot_row_indices, subplot_col_indices)

    return what_to_plot, trace_row_indices, trace_col_indices

def traces_to_plot_pie(df_kins_variable, subplot_row_indices, subplot_col_indices):
    what_to_plot = []
    trace_row_indices = []
    trace_col_indices = []
    
    # find float columns
    data_columns = df_kins_variable[df_kins_variable.select_dtypes(include=['float64']).columns]
    
    # Define your color sequence
    # color_sequence = plotly.colors.DEFAULT_PLOTLY_COLORS

    for i, row in df_kins_variable.iterrows():
        color_index = 0  # Reset color index for each subplot
        trace = go.Pie(labels = data_columns, values = row[data_columns], name = row["GeoName"])
        what_to_plot.append(trace)
        trace_row_indices.append(subplot_row_indices[i])
        trace_col_indices.append(subplot_col_indices[i])
            # color_index += 1  # Increment color index
    return what_to_plot, trace_row_indices, trace_col_indices

def traces_to_plot_bar(df_kins_variable, subplot_row_indices, subplot_col_indices, type_of_plot = "bar"):
    what_to_plot = []
    trace_row_indices = []
    trace_col_indices = []
    
    # find float columns
    data_columns = df_kins_variable[df_kins_variable.select_dtypes(include=['float64']).columns]
    
    # Define your color sequence
    color_sequence = plotly.colors.DEFAULT_PLOTLY_COLORS

    for i, row in df_kins_variable.iterrows():
        color_index = 0  # Reset color index for each subplot
        for col in data_columns:
            trace = go.Bar(x=[row["GeoName"]], y = [row[col]], name = col, 
                marker_color=color_sequence[color_index % len(color_sequence)]  # Use modulo to cycle through colors
            )
            what_to_plot.append(trace)
            trace_row_indices.append(subplot_row_indices[i])
            trace_col_indices.append(subplot_col_indices[i])
            color_index += 1  # Increment color index
    return what_to_plot, trace_row_indices, trace_col_indices

def place_traces(fig, trace_row_indices, trace_col_indices, what_to_plot, type_of_plot = "bar"):
    for i in range(len(what_to_plot)):
        fig.add_trace(what_to_plot[i], row=trace_row_indices[i], col=trace_col_indices[i])
    match type_of_plot:
        case "stacked_bar":
            fig.update_layout(barmode='stack')
    fig.show()

In [43]:
df_kins_variable

Unnamed: 0,GeoFIPS,GeoName,agri_forestry_mining,construction,manufacturing,wholesale_trade,retail_trade,transport_utilities,information,finance_real_estate,prof_sci_mgmt_admin,education_health,arts_entertainment,other_services,public_admin
168,5129,"Searcy, AR",0.057143,0.054733,0.172117,0.005508,0.140103,0.065404,0.001721,0.042341,0.050602,0.281928,0.055422,0.017212,0.055766
468,13181,"Lincoln, GA",0.005215,0.117014,0.14309,0.03781,0.079205,0.03781,0.006193,0.024446,0.104954,0.254563,0.07073,0.059322,0.059648
846,19135,"Monroe, IA",0.050723,0.065176,0.213253,0.014453,0.106899,0.045269,0.018816,0.065721,0.03327,0.247614,0.05754,0.046359,0.034906
879,20003,"Anderson, KS",0.083284,0.097264,0.093099,0.036288,0.114218,0.059488,0.017847,0.063355,0.019929,0.278703,0.02558,0.071981,0.038965
899,20043,"Doniphan, KS",0.075592,0.05696,0.180197,0.024754,0.077455,0.054831,0.00346,0.035667,0.048709,0.289327,0.055629,0.050572,0.046846


In [95]:
def plot_categorical(df):
    # Create a plotly figure
    fig = go.Figure()
    
    # Loop through each column and add as a trace (bar)
    for column in df.columns:
        hover_text = [f'<b>{column}</b><br> {round(value*100*10)/10}%' for value in df[column]]
        fig.add_trace(go.Bar(
            x=df.index, 
            y=df[column],
            name=column,
            hovertext=hover_text,
            hoverinfo='text'
        ))

    
    # Adjust layout for stacked bar
    fig.update_layout(
        barmode='stack',
    )
    # remove axes
    fig.update_xaxes(showticklabels=False)
    fig.update_yaxes(showticklabels=False)
    # Show figure
    return fig


def plot_timeseries(df):
    # Assuming your dataframe is named df
    years = df.columns
    values = df.values[0]

    fig = go.Figure()

    fig.add_trace(go.Scatter(x=years, y=values, mode='lines+markers', name='Values'))

    fig.update_layout(
        title='Time Series Plot',
        xaxis_title='Year',
        yaxis_title='Value',
        xaxis=dict(tickangle=-45)  # Rotating x labels for better readability
    )
    return fig

def plot_kins_stack_hist(df):

    # Initialize subplots
    fig = go.Figure()

    # Loop through each column and add as a trace (bar)
    for column in df.columns:
        # if float valud 
        if df[column].dtype == "float64":
            hover_text = [f'<b>{column}</b><br> {round(value*100*10)/10}%' for value in df[column]]
            fig.add_trace(go.Bar(
                x = df.index,
                y=df[column].values,
                name=column,
                hovertext=hover_text,
                hoverinfo='text'
            ))

    # Adjust layout for stacked bar
    fig.update_layout(
        barmode='stack',
        # y axis and grid off
        yaxis=dict(showgrid=False, showticklabels=False),
    )
    
    return fig

def plot_kins_hist(df):
    """
    Create a grid of subplots based on the provided list of plots.

    :param plots: A list of go.Figure() objects.
    :return: A subplot figure with the provided plots.
    """
    
    # Calculate the grid size
    total_plots = len(df)
    num_rows = int(np.ceil(np.sqrt(total_plots)))
    num_cols = int(np.ceil(total_plots / num_rows))

    # Define color palette
    colors = px.colors.qualitative.Plotly

    # Initialize subplots
    fig = make_subplots(rows=num_rows, cols=num_cols, shared_xaxes=True)

    # Filter the dataframe to get only float64 type columns
    data = df[df.select_dtypes(include=['float64']).columns]

    # Add each plot to the subplots
    for i in range(1, total_plots + 1):
        row = (i-1) // num_cols + 1
        col = (i-1) % num_cols + 1
        # title 
        fig.update_xaxes(title_text=df.index[i-1], row=row, col=col)

        # Generate hover text and add bar plots for each column
        for j, column in enumerate(data.columns):
            hover_text = [f'<b>{column}</b><br> {round(value*100*10)/10}%' for value in df[column]]
            
            # Determine if legend should be shown
            show_legend = True if i == 1 else False

            # Add trace for the current row of data
            fig.add_trace(go.Bar(
                x=[column],
                y=[data.iloc[i-1][column]],
                hovertext=hover_text,
                hoverinfo='text',
                marker_color=colors[j % len(colors)],
            ), row=row, col=col)
        fig.update_xaxes(showticklabels=False, title_text='hi', row=row, col=col)


    return fig




In [96]:
kins_df = f.euclidean_kins
# set index to GeoName, if not already
kins_df.set_index("GeoName", inplace=True)
variable = "industry"

path = find_repo_root()
variable_df = pd.read_csv(os.path.join(path, "data/processed/" + variable + "_wide.csv"))
variable_df.set_index("GeoName", inplace=True)

n_kins = 10

df = variable_df[variable_df.index.isin(kins_df.index[0:n_kins])]


# Create a subplot figure
fig = plot_kins_hist(df)
fig.show()
fig = plot_kins_stack_hist(df)
fig.show()

In [97]:
# plotting function 
kins_df = f.euclidean_kins
kins_df.columns


Index(['GeoFIPS', '2001', '2002', '2003', '2004', '2005', '2006', '2007',
       '2008', '2009', '2010', '2011', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019', '2020', '2021', '1993_population', '1994_population',
       '1995_population', '1996_population', '1997_population',
       '1998_population', '1999_population', '2000_population',
       '2001_population', '2002_population', '2003_population',
       '2004_population', '2005_population', '2006_population',
       '2007_population', '2008_population', '2009_population',
       '2010_population', '2011_population', '2012_population',
       '2013_population', '2014_population', '2015_population',
       '2016_population', '2017_population', '2018_population',
       '2019_population', '2020_population', '2021_population',
       'distance to 20003', 'percentile'],
      dtype='object')

In [98]:
len(df)

10