# Analyzing economic indicators in California after COVID-19

### Load Python tools

In [1]:
import pandas as pd
from urllib.request import urlopen 
import os
import glob
import requests
import matplotlib
import numpy as np
from altair import datum
import altair as alt
import altair_latimes as lat
from sodapy import Socrata
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.options.display.max_columns = 50
pd.options.display.max_rows = 34000

## Worker Adjustment and Retraining Notification Act (WARN) notices in CA

### Read historic WARN filings pulled from PDF posted online

In [2]:
#https://edd.ca.gov/jobs_and_Training/Layoff_Services_WARN.htm
path = 'input/'
files = glob.glob(os.path.join(path, "tabula*.csv"))

In [3]:
file_df = (pd.read_csv(f, encoding = "ISO-8859-1", low_memory=False)\
           .assign(date=os.path.basename(f)) for f in files)

### Concatenate the CSVs into one frame and clean up fields

In [4]:
warn_past = pd.concat(file_df, ignore_index=True).drop(['date'], axis=1)

In [5]:
warn_past = warn_past.rename(columns={'no._of_employees': 'employees'})
warn_past['county'] = ''
warn_past = warn_past[['notice_date', 'effective_date', 'received_date', 'company', 'city',
       'county', 'employees', 'layoff_closure']]
warn_past['effective_date'] = warn_past['effective_date'].str.replace(' ', '')

### Read current WARN filings from Excel sheet posted online and clean up fields

In [6]:
#https://edd.ca.gov/jobs_and_Training/Layoff_Services_WARN.htm
warnurl = 'https://edd.ca.gov/jobs_and_Training/warn/WARN_Report.xlsx'

In [7]:
warn_current = pd.read_excel(warnurl, skiprows=3, index=False)

In [8]:
warn_current = warn_current[warn_current.columns.drop(list(warn_current.filter(regex='Unnamed:')))]

In [9]:
warn_current = warn_current.drop(warn_current.index[4255:4270])
warn_current.columns = warn_current.columns.str.strip().str.lower().str.replace(' ','_')\
    .str.replace('(', '').str.replace(')', '').str.replace('/','_').str.replace('no._of_','')

### Concatenate old and new tables into single dataframe

In [10]:
warnall = pd.concat([warn_current, warn_past])

### Fix dates and remove stray characters and rows

In [11]:
warnall['county'] = warnall['county'].str.strip()
warnall['city'] = warnall['city'].str.strip()
warnall = warnall[warnall['notice_date'] != 'Summary by Month']
warnall = warnall[warnall['notice_date'] != 'Total']
warnall['notice_date'] = pd.to_datetime(warnall['notice_date'])
warnall['effective_date'] = pd.to_datetime(warnall['effective_date'])
warnall['effective_year'] = (warnall['effective_date'].dt.year).astype(str)

### Export to CSV

In [12]:
warnall.to_csv('output/warnall.csv')

### Group WARN notices by year

In [13]:
warngroup = warnall.groupby('effective_year').agg({'employees':'sum'}).reset_index()\
.rename(columns={'effective_year':'notices', 'employees':'employees_effected', 'effective_year':'year'})

In [14]:
warnall.head()

Unnamed: 0,notice_date,effective_date,received_date,company,city,county,employees,layoff_closure,effective_year
0,2020-03-11,2020-03-11,2020-05-18 00:00:00,Hilton San Francisco Airport Bayfront,Burlingame,San Mateo County,110.0,Layoff Unknown at this time,2020.0
1,2020-03-18,2020-03-18,2020-05-18 00:00:00,"Auburn Auto, Inc dba Auburn Toyota",Auburn,Placer County,73.0,Layoff Temporary,2020.0
2,2020-03-20,2020-03-13,2020-05-18 00:00:00,"Pinnacle Exhibits, Inc.",Irvine,Orange County,47.0,Layoff Temporary,2020.0
3,2020-03-20,2020-03-19,2020-05-18 00:00:00,"CBDM Redlands, LLC. dba The Library Gentlemen'...",Redlands,San Bernardino County,124.0,Closure Temporary,2020.0
4,2020-03-23,2020-03-19,2020-05-18 00:00:00,"Mama Management USA, LLC DBA Mama Shelter",Los Angeles,Los Angeles County,100.0,Layoff Temporary,2020.0


### Group WARN notices by day

In [15]:
warngroupday = warnall.groupby('notice_date').agg({'employees':'sum'}).reset_index()\
.rename(columns={'notice_date':'date', 'employees':'employees_effected', 'effective_year':'year'})

In [16]:
warngroupday.tail(10)

Unnamed: 0,date,employees_effected
1303,2020-04-28,3264.0
1304,2020-04-29,893.0
1305,2020-04-30,1823.0
1306,2020-05-01,7629.0
1307,2020-05-04,1772.0
1308,2020-05-05,12512.0
1309,2020-05-06,1868.0
1310,2020-05-07,6517.0
1311,2020-05-08,1094.0
1312,2020-05-15,60.0


### How has the number of employees affected by WARN noticed changed?

In [17]:
warngroup = warngroup.drop(7)
warngroup.year = warngroup.year.str.replace('.0', '', regex=False)

In [18]:
warngroup

Unnamed: 0,year,employees_effected
0,2014,24012.0
1,2015,62042.0
2,2016,63356.0
3,2017,68665.0
4,2018,68843.0
5,2019,69841.0
6,2020,508823.0


In [132]:
warn_chart = alt.Chart((warngroup))\
    .mark_bar(size=30)\
    .encode(
    x=alt.X('year:T', title=' ', axis=alt.Axis(grid=False, tickCount=6, format='%Y'),\
            scale=alt.Scale(domain=('2014-01-01', '2020-05-01'))),
    y=alt.Y('employees_effected:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0,tickCount=4, format=''),\
           scale=alt.Scale(domain=(1, 500000)))
).properties(width=300, height=300,
     title='Employees affected by mass layoffs'
 )

warn_chart_text_today = (
    alt.Chart((warngroup.query("employees_effected == employees_effected.max()")))\
    .mark_text(dy=-10, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("year:T"), y=alt.Y("employees_effected:Q"), text=alt.Text("employees_effected:Q",format=','))
)

(warn_chart + warn_chart_text_today).configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
)

### How have WARN noticed changed in recent days? 

In [20]:
warngroupday.head()

Unnamed: 0,date,employees_effected
0,2014-01-15,11.0
1,2014-03-05,179.0
2,2014-06-30,109.0
3,2014-07-01,170.0
4,2014-07-02,89.0


In [21]:
warn_chart = alt.Chart((warngroupday.query("date > '01/01/2020'")))\
    .mark_bar(size=5)\
    .encode(
    x=alt.X('date:T', title=' ', axis=alt.Axis(grid=False, tickCount=6, format='%b. %-d, %Y'),\
            scale=alt.Scale(domain=('2020-01-01', '2020-05-01'))),
    y=alt.Y('employees_effected:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0,tickCount=4, format=''),\
           scale=alt.Scale(domain=(0, 40000)))
).properties(width=700, height=300,
     title='Employees affected by mass layoffs'
 )

warn_chart_text_today = (
    alt.Chart((warngroupday.query("employees_effected == employees_effected.max()")))\
    .mark_text(dy=-10, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("date:T"), y=alt.Y("employees_effected:Q"), text=alt.Text("employees_effected:Q",format=','))
)

(warn_chart + warn_chart_text_today).configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
)

### Share of recent WARN notices by county

In [22]:
warn_covid = warnall[warnall['effective_date'] > '2020/03/01']

In [23]:
((warn_covid['county'].value_counts('normalize')*100).round(2).astype(str) + '%').head(10)

Los Angeles County       27.84%
Orange County            10.52%
San Diego County          9.29%
Santa Clara County        6.24%
San Francisco County      5.95%
Alameda County            4.85%
Riverside County          4.22%
San Bernardino County     3.81%
Sacramento County         3.29%
Contra Costa County       2.55%
Name: county, dtype: object

### Top companies issuing WARN notices statewide since March 1

In [24]:
warn_covid_companies = warn_covid.groupby(['company']).agg('sum').reset_index()
warn_covid_companies.sort_values(by='employees', ascending=False).head(10)

Unnamed: 0,company,employees
2360,"Ross Stores, Inc.",14651.0
2682,"Tesla, Inc",11083.0
2696,The Cheesecake Factory Incorporated,7439.0
211,Aramark,6650.0
1133,Fitness International LLC,5600.0
284,"BJ's Restaurants, Inc.",5127.0
1699,Levy Premium Foodservice Limited Partnership,4105.0
2962,"Western Dental Services, Inc.",3942.0
774,"CorePower Yoga, LLC dba CorePower Yoga",3741.0
1178,"Fox Sports Productions, LLC",3595.0


### Top companies issuing WARN notices in LA County since March 1

In [25]:
warn_covid_companies_la = warn_covid[warn_covid['county'] == 'Los Angeles County']\
    .groupby(['company']).agg('sum').reset_index()
warn_covid_companies_la.sort_values(by='employees', ascending=False).head(10)

Unnamed: 0,company,employees
749,"Ross Stores, Inc.",3737.0
381,"Fox Sports Productions, LLC",3595.0
923,UNIVERSAL CITY STUDIOS DBA UNIVERSAL STUDIOS H...,3025.0
547,Levy Premium Foodservice Limited Partnership,2771.0
871,The Cheesecake Factory Incorporated,2391.0
969,YMCA of Metropolitan Los Angeles,2285.0
358,Fitness International LLC,2274.0
450,Hawaiian Gardens Casino,1900.0
548,Levy Premium Foodservice Limited Partnership a...,1783.0
973,YogaWorks,1514.0


In [26]:
warn_covid.head()

Unnamed: 0,notice_date,effective_date,received_date,company,city,county,employees,layoff_closure,effective_year
0,2020-03-11,2020-03-11,2020-05-18 00:00:00,Hilton San Francisco Airport Bayfront,Burlingame,San Mateo County,110.0,Layoff Unknown at this time,2020.0
1,2020-03-18,2020-03-18,2020-05-18 00:00:00,"Auburn Auto, Inc dba Auburn Toyota",Auburn,Placer County,73.0,Layoff Temporary,2020.0
2,2020-03-20,2020-03-13,2020-05-18 00:00:00,"Pinnacle Exhibits, Inc.",Irvine,Orange County,47.0,Layoff Temporary,2020.0
3,2020-03-20,2020-03-19,2020-05-18 00:00:00,"CBDM Redlands, LLC. dba The Library Gentlemen'...",Redlands,San Bernardino County,124.0,Closure Temporary,2020.0
4,2020-03-23,2020-03-19,2020-05-18 00:00:00,"Mama Management USA, LLC DBA Mama Shelter",Los Angeles,Los Angeles County,100.0,Layoff Temporary,2020.0


---

### County-by-county unemployment for use later

In [27]:
### Local Area Unemployment Statistics (LAUS)

In [28]:
# https://data.edd.ca.gov/Labor-Force-and-Unemployment-Rates/Local-Area-Unemployment-Statistics-LAUS-/e6gw-gvii

In [29]:
laus_token = 'E1ZFEgt5pMsnqQvHOr7wcf1Da'

client = Socrata('data.edd.ca.gov', laus_token, username="matt.stiles@latimes.com", password="datadesk!1")

results = client.get("e6gw-gvii", limit=200000)
laus = pd.DataFrame.from_records(results)

In [30]:
laus.columns = laus.columns.str.strip().str.lower().str.replace(' ', '_')\
    .str.replace('(', '').str.replace(')', '').str.replace('-','_').str.replace('_&_','_')

In [31]:
laus['date'] = pd.to_datetime(laus['date'])
laus['unemployment_rate'] = laus['unemployment_rate'].astype(float)

In [32]:
laus_state = laus[(laus['seasonally_adjusted_y_n'] == 'Y') &\
               (laus['area_type'] == 'State')]

In [33]:
laus_state.head()

Unnamed: 0,area_type,area_name,date,year,month,seasonally_adjusted_y_n,status_preliminary_final,labor_force,employment,unemployment,unemployment_rate
119,State,California,2020-03-01,2020,March,Y,Preliminary,19264200,18244100,1020100,0.053
1173,State,California,2020-02-01,2020,February,Y,Final,19516000,18756700,759300,0.039
2226,State,California,2020-01-01,2020,January,Y,Final,19509600,18756400,753300,0.039
3279,State,California,2019-12-01,2019,December,Y,Final,19483800,18729100,754700,0.039
4331,State,California,2019-11-01,2019,November,Y,Final,19487600,18733900,753700,0.039


In [34]:
laus_sa = laus[(laus['seasonally_adjusted_y_n'] == 'Y')]

In [35]:
laus_sa.head()

Unnamed: 0,area_type,area_name,date,year,month,seasonally_adjusted_y_n,status_preliminary_final,labor_force,employment,unemployment,unemployment_rate
18,County,Los Angeles County,2020-03-01,2020,March,Y,Preliminary,4995800,4680900,314800,0.063
59,Metro Division,"Anaheim-Santa Ana-Irvine, CA Met Div",2020-03-01,2020,March,Y,Prelim,1603600,1547357,56243,0.035
60,Metro Division,"Oakland-Hayward-Berkeley, CA Met Div",2020-03-01,2020,March,Y,Prelim,1391630,1339163,52467,0.038
61,Metro Division,San Francisco-Redwood City-South San Francisco...,2020-03-01,2020,March,Y,Prelim,1035351,1006661,28690,0.028
62,Metro Division,"San Rafael, CA Met Div",2020-03-01,2020,March,Y,Prelim,138626,134585,4041,0.029


---

### Pull historic unemployment rates by county from BLS

In [36]:
formaturl = lambda x: 'https://www.bls.gov/lau/laucnty' + f'{x[0]}' + f'{x[1]}' + '.xlsx'

In [37]:
metadata = []
for d in pd.date_range(start='01/01/1990', end='12/31/2019', freq='Y'):
    metadata.append(dict(year = d.strftime('%y'), \
                         url = formaturl((d.strftime('%y')))))

In [38]:
df_list = []

for m in metadata:
    df_list.append((pd.read_excel(m['url'],skiprows=4,\
    dtype={'Code': str, 'Code.1': str, 'Code.2': str, 'Year': str,\
          'Unnamed: 5': str})).assign(date=m['year']))
    
df = pd.concat(df_list, sort=False)

In [39]:
df.rename(columns={"Code": "laus", "Code.1": "stfips", "Code.2": "ctyfips", "Year": "year", "(%)":"rate",\
                   "County Name/State Abbreviation": "county", "Force": "labor_force", "Employed": "employed",\
                    "Unemployed": "unemployed"}, inplace=True)

In [40]:
df = df.drop(['Unnamed: 5', 'date'], axis=1)

In [41]:
df = df.drop([0], axis=0)
df = df.dropna()

In [42]:
counties = pd.DataFrame(df)

In [43]:
ca_cty_annual = pd.DataFrame(counties[counties['county'].str.contains(', CA')]).reset_index()

In [44]:
ca_cty_annual['county'] = ca_cty_annual['county'].str.replace(' County, CA','')
ca_cty_annual['county'] = ca_cty_annual['county'].str.replace(' County/city, CA','')
ca_cty_annual['rate'] = ca_cty_annual['rate'] / 100

In [45]:
ca_cty_annual.tail()

Unnamed: 0,index,laus,stfips,ctyfips,county,year,labor_force,employed,unemployed,rate
1735,240,CN0610700000000,6,107,Tulare,2019,203980,184353,19627,0.096
1736,241,CN0610900000000,6,109,Tuolumne,2019,21317,20366,951,0.045
1737,242,CN0611100000000,6,111,Ventura,2019,423390,408169,15221,0.036
1738,243,CN0611300000000,6,113,Yolo,2019,108658,104220,4438,0.041
1739,244,CN0611500000000,6,115,Yuba,2019,29997,28182,1815,0.061


### Annual unemployment rates, by California county: 1990-2019

In [134]:
ca_cty_annual_chart = alt.Chart(ca_cty_annual)\
    .mark_bar(size=3)\
    .encode(
    x=alt.X('year:T', title=' ', axis=alt.Axis(grid=False, tickCount=2, format='%Y'),\
           scale=alt.Scale(domain=('1990-01-01', '2020-01-01'))),
    y=alt.Y('rate:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=3, format='%'),\
           scale=alt.Scale(domain=(0, .35)))
).properties(width=80, height=70,
    title='Annual unemployment rates, by California county: 1990-2019'
 ).facet(
    facet=alt.Facet('county:N'),
    columns=8,
    padding={"left": -15, "top": 0, "right": -0, "bottom": 0}
)

(ca_cty_annual_chart).configure_view(strokeOpacity=0)

---

### Time series data from BLS for state unemployment rate

In [47]:
# https://data.edd.ca.gov/Labor-Force-and-\
#     Unemployment-Rates/Civilian-Unemployment-Rate-for-US-and-California/x7g9-zu4h

In [48]:
rate_token = 'E1ZFEgt5pMsnqQvHOr7wcf1Da'

client = Socrata('data.edd.ca.gov', rate_token, username="matt.stiles@latimes.com", password="datadesk!1")

results = client.get("x7g9-zu4h", limit=2000000)
unemp_rate_state = pd.DataFrame.from_records(results)

In [49]:
unemp_rate_state.columns = unemp_rate_state.columns.str.strip().str.lower().str.replace(' ', '_')\
    .str.replace('(', '').str.replace(')', '').str.replace('-','_').str.replace('_&_','_')

In [50]:
unemp_rate_state.date = pd.to_datetime(unemp_rate_state.date)

In [51]:
unemp_rate_state.head()

Unnamed: 0,area_type,geographic_area,date,year,month,seasonally_adjusted,not_seasonally_adjusted
0,National,U.S.,2020-03-01,2020,March,0.044,0.045
1,State,California,2020-03-01,2020,March,0.053,0.056
2,National,U.S.,2020-02-01,2020,February,0.035,0.038
3,State,California,2020-02-01,2020,February,0.039,0.043
4,National,U.S.,2020-01-01,2020,January,0.036,0.04


In [52]:
unemp_rate_state.rename(columns={'geographic_area':'place','seasonally_adjusted':'rate'},inplace=True)

In [53]:
unemp_rate_state = unemp_rate_state.drop(['not_seasonally_adjusted', 'area_type'], axis=1)

In [54]:
unemp_rate_state.head()

Unnamed: 0,place,date,year,month,rate
0,U.S.,2020-03-01,2020,March,0.044
1,California,2020-03-01,2020,March,0.053
2,U.S.,2020-02-01,2020,February,0.035
3,California,2020-02-01,2020,February,0.039
4,U.S.,2020-01-01,2020,January,0.036


In [55]:
ca_rate_chart = alt.Chart((unemp_rate_state.query("date > '01/01/1990'")))\
    .mark_line(size=3)\
    .encode(
    x=alt.X('date:T', title='', axis=alt.Axis(grid=False, tickCount=6, format='%Y')),
    y=alt.Y('rate:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=3, format='%'),\
           scale=alt.Scale(domain=(0, .15))),
    color=alt.Color('place', title=' ')
).properties(width=800, height=200,
     title='Unemployment rate in California, 1990 - March'
 )

ca_rate_chart_text_today = (
    alt.Chart((unemp_rate_state.query("rate == .122 & date == '02/01/2010'")))\
    .mark_text(dy=-10, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("date:T"), y=alt.Y("rate:Q"), text=alt.Text("rate:Q",format=',.1%'))
)

ca_rate_chart_text_max = (
    alt.Chart((unemp_rate_state.query("date == '2020-03-01'")))\
    .mark_text(dy=-10, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("date:T"), y=alt.Y("rate:Q"), text=alt.Text("rate:Q",format=',.1%'))
)

(ca_rate_chart + ca_rate_chart_text_max + ca_rate_chart_text_today).configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
)

In [56]:
ca_rate_chart = alt.Chart((unemp_rate_state.query("date > '01/01/2019'"))).mark_line(size=4)\
.encode(
    x=alt.X('date:T', title='', axis=alt.Axis(grid=False, tickCount=6, format='%b. %-d, %Y')),
    y=alt.Y('rate:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=3, format='%'),\
           scale=alt.Scale(domain=(.025, .055))),
    color=alt.Color('place', title=' ')
).properties(width=800, height=200,
     title='Unemployment rate in California'
)


ca_rate_chart_text = (
    alt.Chart((unemp_rate_state.query("date == date.max()")))\
    .mark_text(dy=-10, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("date:T"), y=alt.Y("rate:Q"), text=alt.Text("rate:Q",format=',.1%'))
)

(ca_rate_chart + ca_rate_chart_text).configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
)

---

### Unemployment claims

In [57]:
url = 'https://oui.doleta.gov/unemploy/csv/ar539.csv'
#https://oui.doleta.gov/dmstree/handbooks/402/402_4/4024c6/4024c6ar539.pdf
#https://oui.doleta.gov/dmstree/handbooks/401/i_1.pdf
raw = pd.read_csv(url,low_memory=False)

In [58]:
raw.rename(columns={'st':'state',
'rptdate':'date',
'c1':'week',
'c2':'week_ending',
'c3':'initial_claims',
'c4':'fic',
'c5':'xic',
'c6':'wsic',
'c7':'wseic',
'c8':'continued_claims',
'c9':'fcw',
'c10':'xcw',
'c11':'wscw',
'c12':'wsecw',
'c13':'ebt',
'c14':'ebui',
'c15':'abt',
'c16':'abui',
'c17':'at',
'c18':'covered_unemployment',
'c19':'rate_insured_unemployment',
'c20':'ar',
'c21':'p',
'c22':'status',
'c23':'status_change_date'
}, inplace=True)

In [59]:
raw['date'] = pd.to_datetime(raw['date'])

In [60]:
rawca = raw[raw['state'] == 'CA'].reset_index()

### New unemployment claims

In [61]:
ca_claims_chart = alt.Chart((rawca.query("date > '01/01/1990'"))).mark_bar(size=1)\
.encode(
    x=alt.X('date:T', title='', axis=alt.Axis(grid=False, tickCount=6, format='%Y')),
    y=alt.Y('initial_claims:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=3, format=''))
).properties(width=800, height=200,
     title='New unemployment claims in California, by week'
 )

ca_claims_chart_text = (
    alt.Chart((rawca.query("date == date.max()")))\
    .mark_text(dy=-10, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("date:T"), y=alt.Y("initial_claims:Q"), text=alt.Text("initial_claims:Q",format=','))
)

(ca_claims_chart + ca_claims_chart_text).configure_view(strokeOpacity=0)

In [62]:
ca_claims_chart = alt.Chart((rawca.query("date > '01/01/2019'"))).mark_bar(size=10)\
.encode(
    x=alt.X('date:T', title='', axis=alt.Axis(grid=False, tickCount=6, format='%b. %-d, %Y')),
    y=alt.Y('initial_claims:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=3, format=''))
).properties(width=800, height=200,
     title='New unemployment claims in California, by week'
 )

ca_claims_chart_text = (
    alt.Chart((rawca.query("initial_claims == initial_claims.max()")))\
    .mark_text(dy=-10, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("date:T"), y=alt.Y("initial_claims:Q"), text=alt.Text("initial_claims:Q",format=','))
)

(ca_claims_chart + ca_claims_chart_text).configure_view(strokeOpacity=0)

In [63]:
continued_claims_chart = alt.Chart((rawca.query("date > '01/01/1990'"))).mark_bar(size=1)\
.encode(
    x=alt.X('date:T', title='', axis=alt.Axis(grid=False, tickCount=6, format='%Y')),
    y=alt.Y('continued_claims:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=5, format=''))
).properties(width=800, height=400,
     title='Continued unemployment claims in California, by week'
 )

continued_claims_chart_text = (
    alt.Chart((rawca.query("continued_claims == continued_claims.max()")))\
    .mark_text(dy=-14, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("date:T"), y=alt.Y("continued_claims:Q"), text=alt.Text("continued_claims:Q",format=','))
)

(continued_claims_chart + continued_claims_chart_text).configure_view(strokeOpacity=0)

In [64]:
continued_claims_chart = alt.Chart((rawca.query("date > '01/01/2019'"))).mark_bar(size=10)\
.encode(
    x=alt.X('date:T', title='', axis=alt.Axis(grid=False, tickCount=6, format='%Y')),
    y=alt.Y('continued_claims:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=5, format=''))
).properties(width=800, height=400,
     title='Continued unemployment claims in California, by week'
 )

continued_claims_chart_text = (
    alt.Chart((rawca.query("continued_claims == continued_claims.max()")))\
    .mark_text(dy=-14, color="#000000", font='Benton Gothic', fontWeight='bolder', fontSize=12)
    .encode(x=alt.X("date:T"), y=alt.Y("continued_claims:Q"), text=alt.Text("continued_claims:Q",format=','))
)

(continued_claims_chart + continued_claims_chart_text).configure_view(strokeOpacity=0)

In [65]:
rawca.sort_values(by='date', ascending=False).head()

Unnamed: 0,index,state,date,week,week_ending,initial_claims,fic,xic,wsic,wseic,continued_claims,fcw,xcw,wscw,wsecw,ebt,ebui,abt,abui,at,covered_unemployment,rate_insured_unemployment,ar,p,status,status_change_date,curdate,priorwk_pub,priorwk
1782,8929,CA,2020-05-09,18,5/2/2020,211945,201,174,2929,722,2848011,2595,1813,8412,2193,0,0,3019,2914,1317118,17330010,7.6,2.17,350.23,B,5/10/2020,5/19/2020,5/14/2020,5/12/2020
1781,8928,CA,2020-05-02,17,4/25/2020,315893,197,174,1675,364,4805047,3966,3269,13224,3314,0,0,3810,3733,1125499,17330010,6.49,2.2,295.0,B,5/10/2020,5/19/2020,5/14/2020,5/12/2020
1780,8927,CA,2020-04-25,16,4/18/2020,325075,243,198,1290,268,1886140,2050,1060,5658,1435,0,0,2195,2108,785498,17330010,4.53,2.21,204.97,E,5/12/2012,5/19/2020,5/14/2020,5/12/2020
1779,8926,CA,2020-04-18,15,4/11/2020,527905,267,245,1906,455,2421030,2164,1164,5791,1428,0,0,2810,2720,666167,17330010,3.84,2.24,171.42,E,5/12/2012,5/19/2020,5/14/2020,5/12/2020
1778,8925,CA,2020-04-11,14,4/4/2020,654990,235,222,2227,482,1428541,2120,943,5611,1413,0,0,2377,2289,508820,17330010,2.93,2.23,131.39,E,5/12/2012,5/19/2020,5/14/2020,5/12/2020


---

### Characteristics of the unemployment insurance claimants

In [66]:
#https://oui.doleta.gov/unemploy/chariu.asp
claimants_url = 'https://oui.doleta.gov/unemploy/csv/ar203.csv'
claimants = pd.read_csv(claimants_url)

In [67]:
claimants.rename(columns={'st':'state',
'rptdate':'date',
'c1':'population',
'c2':'male',
'c3':'female',
'c4':'mf_ina',
'c12':'<22',
'c13':'22-24',
'c14':'25-34',
'c15':'35-44',
'c16':'45-54',
'c17':'55-59',
'c18':'60-64',
'c19':'>=65',
'c20':'age_ina',
'c40':'hisp',
'c41':'nothisp',
'c42':'hisp_ina',
'c43':'ai_an',
'c44':'asian',
'c45':'black',
'c46':'nh_pi',
'c47':'white',
'c48':'other',
'c49':'Ag/Forestry/Fishing/Hunting',
'c50':'Mining',
'c51':'Utilities',
'c52':'Construction',
'c53':'Manufacturing',
'c54':'Wholesale Trade',
'c55':'Retail Trade',
'c56':'Transportation & Warehouse',
'c57':'Information',
'c58':'Real Estate Rental & Leasing',
'c59':'Professional/Scientific/ Tech.Services',             
'c60':'Management of Companies & Enterprises',
'c61':'Admin.&Support/Waste Mgmt./Remedia. Serv.',
'c62':'Other Services',
'c63':'Educational Services',
'c64':'Health Care & Social Assistance',
'c65':'Arts, Entertainment & Recreation',
'c66':'Accommodation and Food Services',
'c67':'Other',
'c68':'Public Administration',
'c69':'naics_ina',
'c70':'Management',
'c71':'Business & Financial Ops.',
'c72':'Computer & Math',
'c73':'Architecture & Engineering',
'c74':'Life, Physical & Social Sciences',
'c75':'Community & Social Services',
'c76':'Legal',
'c77':'Education, Training & Library',
'c78':'Arts, Design, Entertainment Sports & Media',
'c79':'Healthcare Practitioner & Technical',
'c80':'Healthcare Support',
'c81':'Protective Services ',
'c82':'Food Prep. & Serving Related',
'c83':'Build. & Grounds Cleaning & Maintenance',
'c84':'Personal Care & Services',
'c85':'Sales & Related',
'c86':'Office & Admin. Support',
'c87':'Farming, Fishing & Forestry',
'c88':'Construction & Extraction',
'c89':'Installation, Maintenance & Repair',
'c90':'Production',
'c91':'Transportation & Material Moving',
'c92':'Military Specific',
'c93':'sector_ina',
}, inplace=True)

In [68]:
claimants.columns = claimants.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('<', '')\
    .str.replace('(', '').str.replace(')', '').str.replace('-','_').str.replace('_&_','_').str.replace('>=','')

In [69]:
claimants['allpop'] = claimants.apply(lambda x: x['male'] + x['female'] + x['mf_ina'], axis=1)

In [70]:
claimants['date'] = pd.to_datetime(claimants['date'])

In [71]:
claimants['under35'] = claimants.apply(lambda x: x['22'] + x['22_24'] + x['25_34'], axis=1)

In [72]:
claimants.head()

Unnamed: 0,state,date,population,male,female,mf_ina,22,22_24,25_34,35_44,45_54,55_59,60_64,65,age_ina,hisp,nothisp,hisp_ina,ai_an,asian,black,nh_pi,white,other,ag/forestry/fishing/hunting,...,business_financial_ops.,computer_math,architecture_engineering,"life,_physical_social_sciences",community_social_services,legal,"education,_training_library","arts,_design,_entertainment_sports_media",healthcare_practitioner_technical,healthcare_support,protective_services,food_prep._serving_related,build._grounds_cleaning_maintenance,personal_care_services,sales_related,office_admin._support,"farming,_fishing_forestry",construction_extraction,"installation,_maintenance_repair",production,transportation_material_moving,military_specific,sector_ina,allpop,under35
0,AK,1994-08-31,P,4388,4554,0,343,527,2697,2907,1635,425,288,120,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8942,3567
1,AK,1994-09-30,P,4523,4187,0,325,541,2718,2796,1560,375,263,132,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8710,3584
2,AK,1994-10-31,P,5456,4388,0,309,636,3109,3201,1831,406,242,110,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9844,4054
3,AK,1994-11-30,P,7869,5094,0,451,834,4174,4196,2327,482,323,176,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12963,5459
4,AK,1994-12-31,P,8945,4602,0,502,838,4264,4382,2548,577,280,156,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13547,5604


In [73]:
ca_claimants = claimants[claimants['state'] == 'CA']

In [74]:
ca_claimants.tail()

Unnamed: 0,state,date,population,male,female,mf_ina,22,22_24,25_34,35_44,45_54,55_59,60_64,65,age_ina,hisp,nothisp,hisp_ina,ai_an,asian,black,nh_pi,white,other,ag/forestry/fishing/hunting,...,business_financial_ops.,computer_math,architecture_engineering,"life,_physical_social_sciences",community_social_services,legal,"education,_training_library","arts,_design,_entertainment_sports_media",healthcare_practitioner_technical,healthcare_support,protective_services,food_prep._serving_related,build._grounds_cleaning_maintenance,personal_care_services,sales_related,office_admin._support,"farming,_fishing_forestry",construction_extraction,"installation,_maintenance_repair",production,transportation_material_moving,military_specific,sector_ina,allpop,under35
1609,CA,2019-11-30,S,159296,131523,94,6684,13463,68350,61572,59595,33893,25137,22219,0,123050,139713,28150,2071,28056,24855,0,84732,151199,45850,...,0,79742,0,0,0,0,0,0,0,0,0,32010,0,0,65620,0,34364,0,0,77859,0,0,1318,290913,88497
1610,CA,2019-12-31,S,182826,141524,373,7831,16036,72999,69550,67686,36733,29368,24520,0,154297,141058,29368,1492,26757,23587,0,89222,183665,67779,...,0,78221,0,0,0,0,0,0,0,0,0,34589,0,0,69364,0,49133,0,0,91924,0,0,1492,324723,96866
1611,CA,2020-01-31,S,185111,150451,97,5906,15103,75806,71837,69610,35241,33208,28948,0,158003,147934,29722,2517,27399,27011,0,91006,187726,72030,...,0,79776,0,0,0,0,0,0,0,0,0,35628,0,0,66803,0,48795,0,0,103495,0,0,1162,335659,96815
1612,CA,2020-02-29,S,195720,152760,0,7633,15755,78777,74863,70753,37872,33175,29652,0,167341,149824,31315,2740,27890,27401,0,91793,198656,77602,...,0,80930,0,0,0,0,0,0,0,0,0,35425,0,0,70655,0,55878,0,0,103928,0,0,1664,348480,102165
1613,CA,2020-03-31,S,233764,187254,0,9605,18503,100401,84932,87662,46005,39331,34579,0,193421,189883,37714,4247,38927,35085,0,111624,231135,82404,...,0,101918,0,0,0,0,0,0,0,0,0,45499,0,0,86044,0,59553,0,0,125780,0,0,2224,421018,128509


### What share of CA claimants are older?

In [75]:
ca_claimants_melt = pd.melt(ca_claimants, id_vars=['date'], \
                            value_vars=['22','22_24','25_34','35_44','45_54','55_59','60_64','65'],
        var_name='age', value_name='count')

In [76]:
ca_claimants_melt_select = ca_claimants_melt[((ca_claimants_melt['date'] > '1999-12-31') \
                                             & (ca_claimants_melt['date']< '2001-01-31')) |\
                                            (ca_claimants_melt['date'] > '2018-12-31') \
                                             & (ca_claimants_melt['date']< '2020-01-31')]

In [77]:
ca_claim_year_mean = ca_claimants_melt.groupby(['age', (pd.DatetimeIndex(ca_claimants_melt['date']).year)])\
    .agg('mean').reset_index()

In [78]:
alt.Chart(ca_claimants_melt.query("date > '1/31/2000'")).mark_bar(size=3).encode(
    x=alt.X('count:Q', title=' ', stack="normalize", axis=alt.Axis(tickCount=4,format='%',offset=1, grid=False)),
    y=alt.Y('date:T', title=' ', axis=alt.Axis(tickSize=0,domainOpacity=0,format='%Y',\
                                               tickCount=8,offset=3, gridWidth=.6, gridColor='#dddddd',)),
    color=alt.Color('age', title=' ', scale=alt.Scale(scheme='tableau20')),
    order=alt.Order(
      'age',
      sort='ascending'
    )
).configure_view(strokeOpacity=0).properties(width=700, height=600,
    title='Share of unemployment benefits recipients in California, by age'
).configure_legend(
    orient='top',
    symbolType='square'
).configure_axis(
    labelFontSize=12)

In [79]:
ca_claimants

Unnamed: 0,state,date,population,male,female,mf_ina,22,22_24,25_34,35_44,45_54,55_59,60_64,65,age_ina,hisp,nothisp,hisp_ina,ai_an,asian,black,nh_pi,white,other,ag/forestry/fishing/hunting,...,business_financial_ops.,computer_math,architecture_engineering,"life,_physical_social_sciences",community_social_services,legal,"education,_training_library","arts,_design,_entertainment_sports_media",healthcare_practitioner_technical,healthcare_support,protective_services,food_prep._serving_related,build._grounds_cleaning_maintenance,personal_care_services,sales_related,office_admin._support,"farming,_fishing_forestry",construction_extraction,"installation,_maintenance_repair",production,transportation_material_moving,military_specific,sector_ina,allpop,under35
1291,CA,1993-05-31,S,280632,180338,0,13362,31590,151182,128915,77966,28206,18289,11401,59,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,460970,196134
1292,CA,1993-06-30,S,286026,189492,60,13697,32872,158109,127321,85040,27334,19056,12089,60,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,475578,204678
1293,CA,1993-07-31,S,267473,195934,112,14139,29682,149644,129388,82817,27999,19021,10829,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,463519,193465
1294,CA,1993-08-31,S,251658,183935,108,14062,27369,141103,121061,78229,26669,17402,9806,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,435701,182534
1295,CA,1993-09-30,S,245031,171008,54,13700,27022,135647,115232,73645,24855,16353,9639,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,416093,176369
1296,CA,1993-10-31,S,257353,174875,113,13718,28513,141884,121137,76242,24942,16722,9183,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,432341,184115
1297,CA,1993-11-30,S,277557,185294,118,14449,31978,155271,129866,78760,25819,17114,9712,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,462969,201698
1298,CA,1993-12-31,S,299061,192666,123,17045,34275,161468,135746,84918,28552,18153,11693,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,491850,212788
1299,CA,1994-01-31,S,289412,188055,180,14659,27071,158736,135713,82201,28150,20415,10702,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,477647,200466
1300,CA,1994-02-28,S,378236,245462,90,17655,35761,207179,175832,109984,36121,26303,14953,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,623788,260595


In [80]:
ca_claim_year_mean['date'] = ca_claim_year_mean['date'].astype(str)

In [81]:
alt.Chart(ca_claim_year_mean).mark_bar(size=20).encode(
    x=alt.X('count:Q', title=' ', stack="normalize", axis=alt.Axis(tickCount=4,format='%',offset=1, grid=False)),
    y=alt.Y('date:O', title=' ', axis=alt.Axis(tickSize=0,domainOpacity=0,format='',\
                                               tickCount=8,offset=3, gridWidth=.6, gridColor='#dddddd',)),
    color=alt.Color('age', title=' ', scale=alt.Scale(scheme='tableau20')),
    order=alt.Order(
      'age',
      sort='ascending'
    )
).configure_view(strokeOpacity=0).properties(width=700, height=500,
    title='Share of unemployment benefits recipients in California, by age'
).configure_legend(
    orient='top',
    symbolType='square'
).configure_axis(
    labelFontSize=10)

---

### Imports data from U.S. Trade Online

#### All state-level imports

In [82]:
#https://usatrade.census.gov/data/Perspective60/View/dispview.aspx?ReportId=132784
caimports = pd.read_csv('input/trade/CA Imports by HS Commodities.csv', skiprows=4)

In [83]:
caimports = caimports.drop(['Unnamed: 3'], axis=1)\
    .rename(columns={'Time':'month', 'State':'state', \
                     'Total Value ($US)':'value'})

In [84]:
caimports.head(30)

Unnamed: 0,month,state,value
0,January 2011,California,25667149475
1,February 2011,California,24359474381
2,March 2011,California,28836004931
3,January 2012,California,29453665046
4,February 2012,California,26667617334
5,March 2012,California,30811140326
6,January 2013,California,30459565168
7,February 2013,California,27747178454
8,March 2013,California,28097123007
9,January 2014,California,31587862450


#### Imports to CA ports

In [85]:
#https://usatrade.census.gov/data/Perspective60/View/dispview.aspx?ReportId=132782
ca_port_imports = pd.read_csv('input/trade/CA port imports.csv', skiprows=4)

In [86]:
ca_port_imports = ca_port_imports.drop(['Unnamed: 3'], axis=1)\
    .rename(columns={'Time':'month', 'Port':'port', \
                     'Customs Value (Gen) ($US)':'value'})

In [87]:
ca_port_imports.value = ca_port_imports.value.str.replace(',', '', regex=False)

In [88]:
ca_port_imports['port'] = ca_port_imports['port'].str.replace(', CA (District)', '', regex=False)

In [89]:
ca_port_imports['date'] = pd.to_datetime(ca_port_imports['month'])

In [90]:
ca_port_imports['year'] = ca_port_imports['date'].dt.year

In [91]:
ca_port_imports['value'] = ca_port_imports['value'].astype(int)

In [92]:
ca_port_imports_group = ca_port_imports.groupby(['year']).agg('sum').reset_index()

In [93]:
ca_port_imports_pivot = ca_port_imports.pivot(index='date', columns='port', values='value').reset_index()

In [94]:
ca_port_imports_pivot.tail(20)

port,date,Los Angeles,San Diego,San Francisco
10,2014-02-01,27257671379,2860378342,5135882138
11,2014-03-01,28839576714,3524051276,5668527093
12,2015-01-01,27354092714,3363794786,5388586089
13,2015-02-01,21529054255,3286970753,4577347548
14,2015-03-01,31424405179,3731386588,6468443492
15,2016-01-01,29027438065,3438880472,5318870446
16,2016-02-01,28746495748,3225048891,5315432867
17,2016-03-01,25057305409,3877395810,5305552360
18,2017-01-01,30660351083,3346263822,6169493360
19,2017-02-01,25887868165,3165715102,5165631618


In [95]:
ca_port_imports_pivot['total'] =\
    ca_port_imports_pivot.apply(lambda x: x['Los Angeles'] + x['San Diego'] + x['San Francisco'], axis=1)

In [96]:
ca_port_imports_pivot

port,date,Los Angeles,San Diego,San Francisco,total
0,2011-01-01,26713795670,2444933062,4840951355,33999680087
1,2011-02-01,24470133978,2517011835,4950936997,31938082810
2,2011-03-01,25579118794,2965029871,5905219385,34449368050
3,2012-01-01,30061801564,2683473929,5672918287,38418193780
4,2012-02-01,25003202105,2779438506,5107267945,32889908556
5,2012-03-01,28438369029,3370824218,5690614021,37499807268
6,2013-01-01,31751506998,2906706912,5597304075,40255517985
7,2013-02-01,30057521470,2840882908,4974905041,37873309419
8,2013-03-01,25712497378,3301184150,5513749952,34527431480
9,2014-01-01,33064186794,3115055792,6241291704,42420534290


In [97]:
ca_port_imports_group

Unnamed: 0,year,value
0,2011,100387130947
1,2012,108807909604
2,2013,112656258884
3,2014,115706621232
4,2015,107124081404
5,2016,109312420068
6,2017,114159741246
7,2018,125653737380
8,2019,123897741135
9,2020,107758951437


In [98]:
alt.Chart(ca_port_imports_group).mark_bar().encode(
    x=alt.X('year:N', title=' ', axis=alt.Axis(format='')),
    y=alt.Y('value:Q', title=' ', axis=alt.Axis(tickSize=0,domainOpacity=0,format='',\
                                               tickCount=5,offset=3, gridWidth=.6, gridColor='#dddddd'))
).configure_view(strokeOpacity=0).properties(width=320, height=400,
    title='Imports to California ports, January-March, by year'
).configure_legend(
    orient='top',
    symbolType='square'
).configure_axis(
    labelFontSize=12)

### Exports data from U.S. Trade Online

#### All state-level exports

In [99]:
#https://usatrade.census.gov/data/Perspective60/View/dispview.aspx?ReportId=132783
caexports = pd.read_csv('input/trade/CA Exports by HS Commodities.csv', skiprows=4)

In [100]:
caexports = caexports.drop(['Unnamed: 3'], axis=1)\
    .rename(columns={'Time':'month', 'State':'state', \
                     'Total Value ($US)':'value'})

In [101]:
caexports.head(30)

Unnamed: 0,month,state,value
0,January 2011,California,11728768343
1,February 2011,California,11755473968
2,March 2011,California,14022599774
3,January 2012,California,12423824932
4,February 2012,California,12846110612
5,March 2012,California,14618630845
6,January 2013,California,12580628578
7,February 2013,California,12721150808
8,March 2013,California,14104684167
9,January 2014,California,12901822435


#### Exports from CA ports

In [102]:
#https://usatrade.census.gov/data/Perspective60/View/dispview.aspx?ReportId=132781
ca_port_exports = pd.read_csv('input/trade/CA port exports.csv', skiprows=4)

In [103]:
ca_port_exports = ca_port_exports.drop(['Unnamed: 3'], axis=1)\
    .rename(columns={'Time':'month', 'Port':'port', \
                     'Total Exports Value ($US)':'value'})

In [104]:
ca_port_exports.value = ca_port_exports.value.str.replace(',', '', regex=False)

In [105]:
ca_port_exports['port'] = ca_port_exports['port'].str.replace(', CA (District)', '', regex=False)

In [106]:
ca_port_exports['date'] = pd.to_datetime(ca_port_exports['month'])

In [107]:
ca_port_exports_pivot = ca_port_exports.pivot(index='date', columns='port', values='value').reset_index()

In [108]:
ca_port_exports_pivot.tail(20)

port,date,Los Angeles,San Diego,San Francisco
10,2014-02-01,10128898280,1602641854,4352381143
11,2014-03-01,11857469377,1782227993,4971938009
12,2015-01-01,8260846641,1824281642,3866568383
13,2015-02-01,8289533073,1673589894,3665519223
14,2015-03-01,9805409019,1988832124,4116328890
15,2016-01-01,9082323156,1709662867,3243072254
16,2016-02-01,8852150715,1764117601,3682507694
17,2016-03-01,9610354590,1919391048,4296582733
18,2017-01-01,9844596688,1813464337,4071654835
19,2017-02-01,10345648294,1730781646,4081765957


In [109]:
ca_port_exports.tail(20)

Unnamed: 0,month,port,value,date
70,March 2018,San Diego,2072755942,2018-03-01
71,March 2018,San Francisco,5200570607,2018-03-01
72,January 2019,Los Angeles,9202445758,2019-01-01
73,January 2019,San Diego,2227330595,2019-01-01
74,January 2019,San Francisco,5028625146,2019-01-01
75,February 2019,Los Angeles,9840087521,2019-02-01
76,February 2019,San Diego,2001415717,2019-02-01
77,February 2019,San Francisco,5436924705,2019-02-01
78,March 2019,Los Angeles,11094078933,2019-03-01
79,March 2019,San Diego,2182784004,2019-03-01


In [110]:
# alt.Chart(ca_port_exports).mark_bar().encode(
#     x=alt.X('month:N'),
#     y=alt.Y('value:Q'),
#     color='port:N'
# )

---

### Labor force participation rate

In [111]:
# https://data.edd.ca.gov/Labor-Force-and-Unemployment-Rates/Labor-Force-Participation-Rate-US-and-California/ww59-3giz

In [112]:
labor_part_rate = pd.read_json('https://data.edd.ca.gov/resource/ww59-3giz.json')

In [113]:
labor_part_rate.columns = labor_part_rate.columns.str.strip().str.lower().str.replace(' ', '_')\
    .str.replace('(', '').str.replace(')', '').str.replace('-','_').str.replace('_&_','_')

In [114]:
labor_part_rate.rename(columns={'california_labor_force_participation':'California', \
                                'us_labor_force_participation':'United States'}, inplace=True)

In [115]:
labor_part_rate_melt = pd.melt(labor_part_rate, id_vars=['date'], \
                               value_vars=['California', 'United States'], var_name='place', value_name='rate')

In [116]:
ca_claims_chart = alt.Chart((labor_part_rate_melt.query("date > '01/01/1976'")))\
.mark_line(size=2)\
.encode(
    x=alt.X('date:T', title='', axis=alt.Axis(grid=False, tickCount=6, format='%Y')),
    y=alt.Y('rate:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
            gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=3, format='%'),\
            scale=alt.Scale(domain=(.6, .7))),
    color=alt.Color('place', title=' ', scale=alt.Scale(
            domain=['California', 'United States'],
            range=['#B32F2E', '#ccc'])),
).properties(width=800, height=200,
     title='Labor force participation rate, by month'
 )

(ca_claims_chart).configure_view(strokeOpacity=0).configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
)

In [117]:
ca_claims_chart_zoom = alt.Chart((labor_part_rate_melt.query("date > '01/01/2010'")))\
    .mark_line(size=3)\
    .encode(
    x=alt.X('date:T', title=' ', axis=alt.Axis(grid=False, tickCount=6, format='%Y')),
    y=alt.Y('rate:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=3,tickSize=0,domainOpacity=0,tickCount=3, format='%'),\
           scale=alt.Scale(domain=(.6, .65))),
     color=alt.Color('place', title=' ', scale=alt.Scale(
            domain=['California', 'United States'],
            range=['#B32F2E', '#ccc'])),
).properties(width=800, height=200,
     title='Labor force participation rate, by month'
 )

(ca_claims_chart_zoom).configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
)

In [118]:
### Current employment statistics

In [119]:
#https://data.edd.ca.gov/Industry-Information-/Current-Employment-Statistics-CES-/r4zm-kdcg

In [120]:
ces_token = 'E1ZFEgt5pMsnqQvHOr7wcf1Da'

client = Socrata('data.edd.ca.gov', ces_token, username="matt.stiles@latimes.com", password="datadesk!1")

results = client.get("r4zm-kdcg", limit=1600000)
ces = pd.DataFrame.from_records(results)

In [121]:
ces.columns = ces.columns.str.strip().str.lower().str.replace(' ', '_')\
    .str.replace('(', '').str.replace(')', '').str.replace('-','_').str.replace('_&_','_')

In [122]:
ces['date'] = pd.to_datetime(ces['date'])
ces['current_employment'] = ces['current_employment'].astype(int)

In [123]:
ces_state = ces[(ces['seasonally_adjusted'] == 'Y') &\
               (ces['area_type'] == 'State')]

In [124]:
ces_sa = ces[(ces['seasonally_adjusted'] == 'Y')]

In [125]:
ces_state.head()

Unnamed: 0,area_type,area_name,year,month,date,series_code,industry_title,seasonally_adjusted,current_employment
0,State,California,2020,March,2020-03-01,0,Total Nonfarm,Y,17505000
5,State,California,2020,March,2020-03-01,6000000,Goods Producing,Y,2218700
6,State,California,2020,March,2020-03-01,7000000,Service Providing,Y,15286300
10,State,California,2020,March,2020-03-01,10000000,Mining and Logging,Y,23000
16,State,California,2020,March,2020-03-01,11000000,Total Farm,Y,432200


In [126]:
ces_retail = pd.DataFrame(ces[(ces['industry_title'] == 'Retail Trade') &\
               (ces['seasonally_adjusted'] == 'Y')])

In [127]:
ces_totalnonfarm_la_metro = pd.DataFrame(ces[(ces['industry_title'] == 'Total Nonfarm') &\
               (ces['seasonally_adjusted'] == 'Y') &\
                    (ces['area_name'] == 'Los Angeles-Long Beach-Glendale MD')])

In [128]:
ca_claims_chart_zoom = alt.Chart((ces_totalnonfarm_la_metro.query("date > '2013-01-01'")))\
    .mark_line(size=4)\
    .encode(
    x=alt.X('date:T', title=' ', axis=alt.Axis(offset=20, grid=False, tickCount=8, format='%b. %-d, %Y')),
    y=alt.Y('current_employment:Q', title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0,tickCount=3, format=''),\
           scale=alt.Scale(domain=(4000000, 4700000)))
).properties(width=800, height=200,
     title='Total employment in California, Los Angeles metro area'
 )

(ca_claims_chart_zoom).configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
)

In [129]:
ces.area_name.value_counts().head()

California                            114708
Los Angeles County                     65340
Los Angeles-Long Beach-Glendale MD     65340
Orange County                          36663
Anaheim-Santa Ana-Irvine MD            36663
Name: area_name, dtype: int64

In [130]:
ces.industry_title.value_counts().head()

Total Nonfarm         50925
Service Providing     32049
Government            32049
Federal Government    32049
State Government      32025
Name: industry_title, dtype: int64