In [29]:
import pandas as pd
import numpy as np

def load_data(path: str):
  years = list(range(2005, 2020))
  year2index = { y : i for (i, y) in enumerate(years) }
  data = [pd.read_excel(path, sheet_name=str(y)) for y in years]
  return data, years, year2index

data, years, year2index = load_data("../data/Brazilian Immigrants - modified.xlsx")

def load_state_dict(path: str):
  data = pd.read_excel(path, sheet_name="State code")
  state_dict = dict()
  for i in data.index:
    abbrev = data["State Abbreviation"][i].lower()
    name = data["State Name"][i].lower()
    code = data["FIPS Code"][i]

    state_dict[code] = {
      "name": name,
      "abbrev": abbrev
    }

  return state_dict

state_dict = load_state_dict("../data/Brazilian Immigrants.xlsx")


In [30]:
import numpy as np

def query_by_attr_year(attr: str, year: int, t: bool = False):
  df = data[year2index[year]]

  if t:
    row = df.loc[df.iloc[:, 0] == attr]
  else:
    row = df.loc[df.iloc[:, 1] == attr]
  return row

def query_by_attr_year_state(attr: str, year: int, state: int, t: bool = False):
  row = query_by_attr_year(attr, year, t)
  value = row[state].to_numpy() if state in row.columns else ([None] * len(row))
  return value if t else value[0], row.iloc[:, 1].to_list()

def query_attr_over_years_by_state(attr: str, state: int, t: bool = False):
  values = []

  for y in years:
    value, attrNames = query_by_attr_year_state(attr, y, state, t)
    values.append(value)

  return np.asarray(values).T, attrNames

In [31]:
from pyecharts.render import make_snapshot
from typing import List, Optional
import pyecharts.options as opts
from pyecharts.charts import Grid, Line
import os
from snapshot_selenium import snapshot as driver

def plot_attrs_over_years_of_states(
  attr: str,
  state: int,
  save_name: Optional[str] = None
):
  values_by_state, attr_names = query_attr_over_years_by_state(attr, state, t=True)
#   print(attr_names)
  attr_names[0]='Median Personal Earnings'
  attr_names[1]='Median Personal Earnings (excl 0 and neg)'

  state_name = state_dict[state]["name"].title()

  line = (
    Line()
    .add_xaxis(xaxis_data=[str(y) for y in years])
    .set_global_opts(
      title_opts=opts.TitleOpts(
        title=f"{attr} for {state_name}"
      ),
      tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
      yaxis_opts=opts.AxisOpts(
        type_="value",
        axistick_opts=opts.AxisTickOpts(is_show=True),
        splitline_opts=opts.SplitLineOpts(is_show=True)
      ),
      xaxis_opts=opts.AxisOpts(type_="category", boundary_gap=False),
      datazoom_opts=opts.DataZoomOpts(
        range_start=0,
        range_end=100
      ),
      legend_opts=opts.LegendOpts(is_show=True, pos_top=40),
    )
  )

  for (i, values) in enumerate(values_by_state):
    line.add_yaxis(
      series_name=attr_names[i],
#       stack="Total",
      y_axis=values,
#       areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
      label_opts=opts.LabelOpts(is_show=True)
    )
#   return line
  grid = (
    Grid(init_opts=opts.InitOpts(width="775px", height="500px"))
      .add(line, grid_opts=opts.GridOpts(pos_top=75))
  )

  if save_name is not None:
    grid.render(os.path.join("./html", save_name + ".html"))
    make_snapshot(driver, grid.render(), save_name+".png")

  return grid

In [None]:
plot_attrs_over_years_of_states(
  "Median Personal Earnings",
  state=48,
  save_name="texas"
).render_notebook()

In [32]:
for state in state_dict.keys():
  plot_attrs_over_years_of_states(
    "Median Personal Earnings", 
    state=state,
    save_name=state_dict[state]["name"]
  )