In [None]:
import pandas as pd
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import statsmodels\n",
    "import pickle\n",
    "import plotly.express as px\n",
    "import plotly.offline as py\n",
    "import plotly.graph_objs as go\n",
    "\n",""
    "# clean_national_data = pd.read_pickle(\".\\\\Cleaned\\\\clean_national_data.pkl\")\n",
    "# clean_state_data = pd.read_pickle(\".\\\\Cleaned\\\\clean_state_data.pkl\")\n",
    "# work_stop = pd.read_pickle(\".\\\\PrelimEDA\\\\work_stop.pkl\")\n",
    "# minwagestate = pd.read_pickle(\".\\\\PrelimEDA\\\\minwagestate.pkl\")\n",
    "# finalfull = pd.read_pickle(\".\\\\PrelimEDA\\\\finalfull.pkl\")\n",
    "# final_earnings = pd.read_pickle(\".\\\\PrelimEDA\\\\final_earnings.pkl\")\n",
    "\n",
    "# otherStates = {k:'Other' for k in ['East Coast States', 'Nationwide', 'Interstate']}\n",
    "# industryCounts = work_stop['iTitle'].value_counts().reset_index().rename({'index': 'Industry', 'iTitle': 'Counts'}, axis = 1)\n",
    "# stateCounts = pd.Series(np.concatenate(work_stop['States'])).str.strip().replace(otherStates)\n",
    "# stateCounts = pd.Series(np.where(stateCounts == \"\", None, stateCounts)).value_counts().reset_index().rename({'index': 'State', 0: 'Counts'}, axis = 1)\n",
    "#  #Need to look and see what they mean by nationwide, east coast states, and interstate\n",
    "\n",
    "# earnInd = final_earnings.groupby(['industry_name'])['AvgWeeklyEarnings'].mean().sort_values(ascending = False)\n",
    "# earnInd = earnInd.reset_index()\n",
    "\n",
    "# earnState = final_earnings.groupby(['StateCode'])['AvgWeeklyEarnings'].mean().sort_values(ascending = False)\n",
    "# earnState = earnState.reset_index()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 1 | Data Cleaning and Transformation:"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Generalized Data Cleaning for Ease of Use:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "#This option is to prevent pandas from truncating columns that are strings.\n",
    "#Old versions of pandas may need -1 instead of None\n",
    "pd.set_option('display.max_colwidth', None)\n",
    "\n",
    "\n",
    "#This specifies how many days of employement data we require before a work stoppage. \n",
    "#Right now it is set to six months, meaning we are only working\n",
    "#with work stoppages where we have at least six months of data before the work\n",
    "#stoppage began and six months of data after the work stoppage ended.\n",
    "time_window = pd.Timedelta(180,\"days\")\n",
    "\n",
    "\n",
    "#This specifies to print out messages while processing the data.\n",
    "be_verbose = True"
   ]
  },
  {
   "cell_type": "code",
<<<<<<< HEAD
   "execution_count": 9,
   "id": "f5c90e26",
=======
   "execution_count": 7,
>>>>>>> 06568a1154fed0d7d54efa850de6c3142bfef1df
   "metadata": {},
   "outputs": [],
   "source": [
    "#This cell loads in all the data.\n",
    "#The cleaned data is stored later, so this doesn't need to be rerun unless we're improving the data.\n",
    "\n",
    "\n",
    "#This excel file contains data on each work stoppage.\n",
    "#Industry is by 2017 NAICS code. \n",
    "#This data is from 1988 to 2020.\n",
    "#We changed the xlsx file to a xls file because of compatibility issues with pandas reading a xlsx file with xlrd.\n",
    "#This file is originally from https://www.bls.gov/web/wkstp/monthly-listing.xlsx\n",
    "work_stoppage_df = pd.read_excel(\".//WorkStoppage//work_stop_monthly.xls\", \n",
    "    header=1, skipfooter=6, dtype={\"Industry code[1]\":int}   )\n",
    "#There is an entry or two with the states list empty, we replace the NaN value with an empty string.\n",
    "work_stoppage_df.fillna(\"\", inplace=True)\n",
    "\n",
    "#This text file contains a table with info about each industry type.\n",
    "#We use it to convert the NAICS industry code of work_stoppage_df to the industry code used \n",
    "#in the Current Employment Statistics files. This doesn't give a perfect match up,\n",
    "#so we have to match many of the entries by hand.\n",
    "#This file is originally from https://download.bls.gov/pub/time.series/ce/ce.industry\n",
    "industry_lookup_df = pd.read_csv(\".//CurrentEmploymentStats//ce.industry\", sep=\"\\t\")\n",
    "\n",
    "#This text file contains info about each series_id.\n",
    "#We use it to turn a BLS industry code into a Current Employment Statistic series_id.\n",
    "#This file is orginally from https://download.bls.gov/pub/time.series/ce/ce.series\n",
    "current_employment_series_df = pd.read_csv(\".//CurrentEmploymentStats//ce.series.txt\", sep=\"\\t\", header=0,\n",
    "    names=['series_id', 'supersector_code', 'industry_code',\n",
    "       'data_type_code', 'seasonal', 'series_title', 'footnote_codes',\n",
    "       'begin_year', 'begin_period', 'end_year', 'end_period'],\n",
    "     converters={'series_id':str.strip} )\n",
    "#The header=0 and names is to fix some white space issues with the column names.\n",
    "#The converter is to fix white space issues with the series_id values.\n",
    "#We restrict this data set to only the rows for average weekly earnings of all employees, \n",
    "#which is data_type_code 11, and we use the seasonally adjusted data (seasonally adjusted \n",
    "#is good for comparing monthly data, whereas unadjusted is good for comparing yearly data).\n",
    "#Depending on what we do, we might want to switch to seasonable data or even use both.\n",
    "#Non-adjusted is 'U' instead of 'S'\n",
    "current_employment_series_df = current_employment_series_df[\n",
    "    (current_employment_series_df[\"data_type_code\"]==11)\n",
    "    &(current_employment_series_df[\"seasonal\"]=='S')]\n",
    "\n",
    "#This text file contains the value for the each Current Employment Statistic.\n",
    "#This data set is from 1939 to 2021, but not for all series. It is very spotty.\n",
    "#This file is originally from https://download.bls.gov/pub/time.series/ce/ce.data.0.AllCESSeries\n",
<<<<<<< HEAD
    "current_employment_statistic_df = pd.read_csv(\".//CurrentEmploymentStats//ce.data.0.AllCESSeries.txt\", \n",
=======
    "current_employment_statistic_df = pd.read_csv(\".\\CurrentEmploymentStats\\ce.data.0.AllCESSeries.txt\", \n",
>>>>>>> 06568a1154fed0d7d54efa850de6c3142bfef1df
    "    sep=\"\\t\", header=0, \n",
    "    names=['series_id', 'year', 'period', 'value','footnote_codes'],\n",
    "    converters={'series_id':str.strip} )\n",
    "#The header=0 and names is to fix some white space issues with the column names.\n",
    "#The converter is to fix white space issues with the series_id values.\n",
    "\n",
    "\n",
    "\n",
    "#The datasets oe.data.0.Current and oe.data.1.AllData are only for 2020, so we can't use them for much.\n",
    "#occupation_employment_df = pd.read_csv(\".\\OccEmployment\\oe.data.0.Current\", sep=\"\\s+\")\n",
    "#occupation_employment_df1 = pd.read_csv(\".\\OccEmployment\\oe.data.1.AllData\", sep=\"\\s+\")\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "#Below are some data frames for state level data.\n",
    "\n",
    "#This text file is for states_metro_employment_series. It has information about the\n",
    "#series in the entries of sa.data.0.Current. Unfortunately, the industry data is all \n",
    "#over the place with this data set. Using this might require a lot of data matching done by hand,\n",
    "#it doesn't even look like we can easily pull average wage data for an entire state.\n",
    "state_series_df = pd.read_csv(\".//sa.series\", delim_whitespace=True,\n",
    "    names= ['series_id', 'state_code', 'area_code', 'industry_code', 'detail_code',\n",
    "       'data_type_code', 'seasonal', 'benchmark_year', 'begin_year',\n",
    "       'begin_period', 'end_year', 'end_period'],\n",
    "      header=None, skiprows=1, index_col=False )              \n",
    "#We restrict to data_type_code 4, which  is Average Weekly Earnings In Dollars    \n",
    "state_series_df = state_series_df[ (state_series_df[\"data_type_code\"]==4) ]\n",
    "\n",
    "\n",
    "#This text file contains the actual data for a given series.\n",
<<<<<<< HEAD
    "states_metro_employment_stats = pd.read_csv(\".//StateMetroEmployment//sa.data.0.Current.csv\", sep=\"\\s+\")\n",
=======
    "states_metro_employment_stats = pd.read_csv(\".\\StateMetroEmployment\\sa.data.0.Current.csv\", sep=\"\\s+\")\n",
>>>>>>> 06568a1154fed0d7d54efa850de6c3142bfef1df
    "#This uses SIC code for industry, or so they say. It doesn't look to match the actual SIC codes.\n",
    "#This isn't currently in use, because of matching the data with the work stoppage data."
   ]
  },
  {
   "cell_type": "code",
<<<<<<< HEAD
   "execution_count": 10,
   "id": "ad8e7eea",
=======
   "execution_count": 8,
>>>>>>> 06568a1154fed0d7d54efa850de6c3142bfef1df
   "metadata": {},
   "outputs": [],
   "source": [
    "#This is a bunch of hand matched codes based on the cell below.\n",
    "#This was matched based on the values in 2-6 digit_2017_Codes.xlsx\n",
    "#and ce.industry.\n",
    "naics_to_ce_industry = {\n",
    "92:90922920,\n",
    "923:60541612,\n",
    "3152:32315280,\n",
    "21221:10212200,\n",
    "22121:44221200,\n",
    "22131:None,\n",
    "23731:20237300,\n",
    "31212:32329140,\n",
    "31523:32315280,\n",
    "32721:31327200,\n",
    "33341:31333400,\n",
    "33421:None,\n",
    "33441:31334400,\n",
    "33451:None,\n",
    "33612:None,\n",
    "33621:31336200,\n",
    "33641:31336400,\n",
    "33651:31336900,\n",
    "48521:43485500,\n",
    "48831:43488390,\n",
    "48849:43488400,\n",
    "49211:43492100,\n",
    "51711:50517000,\n",
    "61111:65611100,\n",
    "61121:65611200,\n",
    "61131:65611300,\n",
    "62111:65621100,\n",
    "62210:65622100,\n",
    "62211:65622100,\n",
    "62311:65623100,\n",
    "71111:70711190,\n",
    "92211:None,\n",
    "92214:None,\n",
    "92313:None,\n",
    "211111:10211000,\n",
    "212112:10212113,\n",
    "212230:10212200,\n",
    "212231:10212200,\n",
    "212234:10212200,\n",
    "221110:44221110,\n",
    "221210:44221200,\n",
    "236000:20236000,\n",
    "236200:20236200,\n",
    "236220:20236220,\n",
    "237310:20237300,\n",
    "237990:20237000,\n",
    "238140:20238140,\n",
    "238160:20238160,\n",
    "238210:20238210,\n",
    "238220:20238220,\n",
    "238320:20238320,\n",
    "238350:20238350,\n",
    "238910:20238910,\n",
    "311313:None,\n",
    "311320:None,\n",
    "311812:32311813,\n",
    "313312:32313000,\n",
    "315299:32315280,\n",
    "325180:32325180,\n",
    "325221:32325211,\n",
    "325222:32325211,\n",
    "326199:32326190,\n",
    "326210:32326210,\n",
    "326211:32326210,\n",
    "331110:31331100,\n",
    "331111:31331100,\n",
    "331310:31331300,\n",
    "331312:31331300,\n",
    "331513:31331510,\n",
    "332112:31331400,\n",
    "332913:32326120,\n",
    "332992:31332994,\n",
    "333111:None,\n",
    "333611:31333600,\n",
    "333618:None,\n",
    "333921:None,\n",
    "333996:None,\n",
    "334290:31334200,\n",
    "334612:None,\n",
    "335222:31335200,\n",
    "335224:31335200,\n",
    "335931:31335930,\n",
    "336120:31336100,\n",
    "336212:31336214,\n",
    "336300:31336300,\n",
    "336321:31336320,\n",
    "336322:31336320,\n",
    "336330:31336330,\n",
    "336350:31336350,\n",
    "336360:31336360,\n",
    "336410:31336400,\n",
    "336414:31336419,\n",
    "336510:31336900,\n",
    "336900:None,\n",
    "336992:None,\n",
    "424410:41424410,\n",
    "441110:42441110,\n",
    "445110:42445110,\n",
    "481111:43481100,\n",
    "482111:None,\n",
    "484210:43484210,\n",
    "485110:43485500,\n",
    "485111:43485500,\n",
    "485112:43482000,\n",
    "485113:43485500,\n",
    "485310:43485310,\n",
    "485991:43485900,\n",
    "488190:43488100,\n",
    "488310:43488390,\n",
    "488320:43488320,\n",
    "488330:43488390,\n",
    "488490:43488400,\n",
    "512110:50512110,\n",
    "517110:50517000,\n",
    "524114:55524110,\n",
    "561612:60561613,\n",
    "561720:60561720,\n",
    "561920:60561920,\n",
    "562111:60562100,\n",
    "562219:60562219,\n",
    "611110:65611100,\n",
    "611111:65611100,\n",
    "611210:65611200,\n",
    "611310:65611300,\n",
    "621610:65611610,\n",
    "622110:65622100,\n",
    "622210:65622200,\n",
    "624110:65624110,\n",
    "624410:65624400,\n",
    "721110:70721110,\n",
    "721120:70721120,\n",
    "722510:70722500,\n",
    "921100:None,\n",
    "921110:None,\n",
    "921111:None,\n",
    "921190:None}"
   ]
  },
  {
   "cell_type": "code",
<<<<<<< HEAD
   "execution_count": 11,
   "id": "181ae001",
   "metadata": {},
=======
   "execution_count": 9,
   "metadata": {
    "scrolled": false
   },
>>>>>>> 06568a1154fed0d7d54efa850de6c3142bfef1df
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we should try to match:22131\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we should try to match:333111\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we should try to match:482111\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we should try to match:333111\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we should try to match:482111\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we should try to match:336992\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "Here's a NAICS code we should try to match:482111\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "Here's a NAICS code we should try to match:482111\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we should try to match:333996\n",
      "Here's a NAICS code we should try to match:92313\n",
      "No series data available for this industry code.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we should try to match:33451\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we should try to match:33612\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we should try to match:333618\n",
      "Here's a NAICS code we should try to match:33421\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "Here's a NAICS code we should try to match:482111\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we should try to match:92214\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we should try to match:311320\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we should try to match:92211\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we should try to match:334612\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we should try to match:921190\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "Here's a NAICS code we should try to match:333921\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "Here's a NAICS code we should try to match:921190\n",
      "No series data available for this industry code.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we should try to match:921100\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "Here's a NAICS code we should try to match:921100\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code.\n",
      "Here's a NAICS code we should try to match:921100\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "Here's a NAICS code we should try to match:336900\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "Here's a NAICS code we should try to match:311313\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "Here's a NAICS code we should try to match:921110\n",
      "We have some data to use!\n",
      "Here's a NAICS code we should try to match:921110\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "Here's a NAICS code we should try to match:921110\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "Here's a NAICS code we should try to match:921110\n",
      "Here's a NAICS code we should try to match:921111\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "Here's a NAICS code we should try to match:921100\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "Here's a NAICS code we should try to match:921100\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "Here's a NAICS code we should try to match:921100\n",
      "Here's a NAICS code we should try to match:921100\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "Here's a NAICS code we should try to match:921100\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "No series data available for this industry code.\n",
      "We have some data to use!\n",
      "We have some data to use!\n"
     ]
    }
   ],
   "source": [
    "#This cell goes through the work stoppage data frame and tries to match it up with the CE data\n",
    "#The data is written to a pickle file, so this does not need to be rerun, unless we're \n",
    "#improving the data.\n",
    "\n",
    "#For each work stoppage:\n",
    "#    Get the BLS industry code from the work stoppage NAICS code. \n",
    "#        This usually fails, so we record the NAICS codes we still need to match.\n",
    "#    Get the relevant CES series id from the BLS industry code. \n",
    "#        This fails some of the time, but I don't think there's anyhing to be done\n",
    "#        about it. The data just isn't there.\n",
    "#    If there is data for the CES series that is from before the work stoppage (at least time_window days), then\n",
    "#        we record the series id. This we can use to look up whatever data we want.\n",
    "#        Since this data is at the national level, we don't bother separating by state.\n",
    "#    The initial run keeps track of the NAICS codes that weren't matched at all and then these\n",
    "#        are matched later by hand. So on the second run, everything is matched that can be matched.\n",
    "#        The matches are stored in the dictionary naics_to_ce_industry.\n",
    "\n",
    "\n",
    "rows_to_add = []\n",
    "naics_codes_to_match = []\n",
    "for index, row in work_stoppage_df.iterrows():\n",
    "    naics_code = row[\"Industry code[1]\"]\n",
    "    start_date = row[\"Work stoppage beginning date\"]  \n",
    "    end_date = row[\"Work stoppage ending date\"]  \n",
    "\n",
    "    industry_code = industry_lookup_df[ str(naics_code)==industry_lookup_df[\"naics_code\"] ][\"industry_code\"] \n",
    "    if len(industry_code)!=0:#Did we get an industry code for free?\n",
    "        industry_code = industry_code.iloc[0]\n",
    "    else:#Do we have a match done by hand?\n",
    "        industry_code = naics_to_ce_industry[naics_code]\n",
    "    \n",
    "    \n",
    "    if not industry_code is None:\n",
    "        series_id = current_employment_series_df[ \n",
    "            current_employment_series_df[\"industry_code\"]==industry_code][\"series_id\"]\n",
    "        if len(series_id)==0:\n",
    "            if be_verbose:\n",
    "                print(\"No series data available for this industry code.\")\n",
    "        elif len(series_id)>1:\n",
    "            if be_verbose:\n",
    "                print(\"Multiple series data available for this industry code. Weird.\")\n",
    "        else:\n",
    "            series_id = series_id.iloc[0]\n",
    "            wage_data = current_employment_statistic_df[\n",
    "                current_employment_statistic_df[\"series_id\"]==series_id]\n",
    " \n",
    "            #Is there sufficient data from before the work stoppage began?\n",
    "            #This is controlled by the time_window variable.\n",
    "            ce_year = int(min(wage_data[\"year\"]))\n",
    "            ce_month = int(min(wage_data[wage_data[\"year\"]==ce_year][\"period\"])[1:])\n",
    "            ce_date = pd.Timestamp(year=ce_year,month=ce_month,day=1)\n",
    "            earlier = start_date-time_window\n",
    "    \n",
    "            #do we got data?\n",
    "            if earlier >= ce_date:\n",
    "                print(\"We have some data to use!\")\n",
    "                organization = row['Organizations involved']                    \n",
    "                areas = row['Areas']\n",
    "                ownership = row['Ownership']\n",
    "                states = row[\"States\"].split(\",\")\n",
    "                rows_to_add.append([organization, states,  areas, ownership, naics_code, \n",
    "                    start_date, end_date, series_id] )\n",
    "            else:\n",
    "                if be_verbose:\n",
    "                    print(\"No data is available before the work stoppage.\")\n",
    "    else:\n",
    "        #Load these into a dictionary and try to match by hand.\n",
    "        if be_verbose:\n",
    "            print(f\"Here's a NAICS code we should try to match:{naics_code}\")\n",
    "        naics_codes_to_match.append(naics_code)\n",
    "                \n",
    "clean_national_data = pd.DataFrame( data=rows_to_add,\n",
    "    columns=[\"organization\", \"states\", \"areas\", \"ownership\", \n",
    "        \"naics industry code\", \"start date\", \"end date\", \"series_id\"] )\n",
    "\n",
    "clean_national_data.to_pickle(\".\\\\Cleaned\\\\clean_national_data.pkl\")"
   ]
  },
  {
   "cell_type": "code",
<<<<<<< HEAD
   "execution_count": 12,
   "id": "f733f2a2",
=======
   "execution_count": 11,
>>>>>>> 06568a1154fed0d7d54efa850de6c3142bfef1df
   "metadata": {},
   "outputs": [],
   "source": [
    "sa_state_code_to_abbr = {\n",
    "1:\"AL\",\n",
    "2:\"AK\",\n",
    "4:\"AZ\",\n",
    "5:\"AR\",\n",
    "6:\"CA\",\n",
    "8:\"CO\",\n",
    "9:\"CT\",\n",
    "10:\"DE\",\n",
    "11:\"DC\",\n",
    "12:\"FL\",\n",
    "13:\"GA\",\n",
    "15:\"HI\",\n",
    "16:\"ID\",\n",
    "17:\"IL\",\n",
    "18:\"IN\",\n",
    "19:\"IA\",\n",
    "20:\"KS\",\n",
    "21:\"KY\",\n",
    "22:\"LA\",\n",
    "23:\"ME\",\n",
    "24:\"MD\",\n",
    "25:\"MA\",\n",
    "26:\"MI\",\n",
    "27:\"MN\",\n",
    "28:\"MS\",\n",
    "29:\"MO\",\n",
    "30:\"MT\",\n",
    "31:\"NE\",\n",
    "32:\"NV\",\n",
    "33:\"NH\",\n",
    "34:\"NJ\",\n",
    "35:\"NM\",\n",
    "36:\"NY\",\n",
    "37:\"NC\",\n",
    "38:\"ND\",\n",
    "39:\"OH\",\n",
    "40:\"OK\",\n",
    "41:\"OR\",\n",
    "42:\"PA\",\n",
    "43:\"PR\",\n",
    "44:\"RI\",\n",
    "45:\"SC\",\n",
    "46:\"SD\",\n",
    "47:\"TN\",\n",
    "48:\"TX\",\n",
    "49:\"UT\",\n",
    "50:\"VT\",\n",
    "51:\"VA\",\n",
    "52:\"VI\",\n",
    "53:\"WA\",\n",
    "54:\"WV\",\n",
    "55:\"WI\",\n",
    "56:\"WY\"\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
<<<<<<< HEAD
   "execution_count": 13,
   "id": "e2c86951",
=======
   "execution_count": 12,
>>>>>>> 06568a1154fed0d7d54efa850de6c3142bfef1df
   "metadata": {},
   "outputs": [],
   "source": [
    "#This dictionary turns a NAICS code to the industry code of the sa data.\n",
    "#These were all done by hand.\n",
    "naics_to_sa_industry = {\n",
    "62:None,\n",
    "92:None,\n",
    "236:200001,\n",
    "237:215026,\n",
    "322:426002,\n",
    "336:337002,\n",
    "517:548103,\n",
    "622:880603,\n",
    "623:880556,\n",
    "923:None,\n",
    "2211:None,\n",
    "2362:215403,\n",
    "3118:420503,\n",
    "3141:422002,\n",
    "3152:423002,\n",
    "3221:426156,\n",
    "3315:333203,\n",
    "3324:334103,\n",
    "3331:335303,\n",
    "3361:337114,\n",
    "4243:653026,\n",
    "4244:651403,\n",
    "4841:542103,\n",
    "5311:765002,\n",
    "21221:110002,\n",
    "22112:None,\n",
    "22121:549203,\n",
    "22131:None,\n",
    "23731:216103,\n",
    "23811:217703,\n",
    "23812:217703,\n",
    "23814:217403,\n",
    "23822:217103,\n",
    "23829:215009,\n",
    "23831:None,\n",
    "23832:217203,\n",
    "23835:217503,\n",
    "31212:None,\n",
    "31523:423002,\n",
    "32721:332002,\n",
    "33341:650703,\n",
    "33421:548136,\n",
    "33422:336536,\n",
    "33441:336744,\n",
    "33451:338136,\n",
    "33593:336403,\n",
    "33612:337144,\n",
    "33621:337103,\n",
    "33632:337103,\n",
    "33641:337203,\n",
    "33651:337403,\n",
    "42482:651803,\n",
    "44111:655103,\n",
    "44511:654103,\n",
    "44812:656203,\n",
    "48412:542103,\n",
    "48521:541002,\n",
    "48831:544002,\n",
    "48832:None,\n",
    "48849:None,\n",
    "49211:542103,\n",
    "51111:427103,\n",
    "51512:548303,\n",
    "51711:548103,\n",
    "53112:None,\n",
    "54111:None,\n",
    "54181:873103,\n",
    "56172:None,\n",
    "61111:882103,\n",
    "61121:938224,\n",
    "61131:882203,\n",
    "62111:880103,\n",
    "62210:880603,\n",
    "62211:880603,\n",
    "62311:880503,\n",
    "71111:None,\n",
    "72111:870103,\n",
    "72112:870128,\n",
    "92211:None,\n",
    "92214:None,\n",
    "92313:None,\n",
    "211111:113002,\n",
    "212112:112203,\n",
    "212230:110002,\n",
    "212231:110002,\n",
    "212234:110002,\n",
    "221110:None,\n",
    "221112:None,\n",
    "221122:549103,\n",
    "221210:549203,\n",
    "236000:215002,\n",
    "236200:215403,\n",
    "236220:215403,\n",
    "237310:216103,\n",
    "237990:215026,\n",
    "238140:217403,\n",
    "238160:217603,\n",
    "238210:217303,\n",
    "238220:217103,\n",
    "238320:217203,\n",
    "238350:217503,\n",
    "238910:None,\n",
    "311313:None,\n",
    "311320:None,\n",
    "311611:420114,\n",
    "311615:420114,\n",
    "311812:420503,\n",
    "312111:420803,\n",
    "313312:422002,\n",
    "315299:423002,\n",
    "321911:None,\n",
    "325180:428103,\n",
    "325211:428203,\n",
    "325221:428203,\n",
    "325222:428203,\n",
    "326199:430803,\n",
    "326210:430103,\n",
    "326211:430103,\n",
    "331110:333124,\n",
    "331111:333124,\n",
    "331310:None,\n",
    "331312:333254,\n",
    "331513:333254,\n",
    "332112:334609,\n",
    "332913:650703,\n",
    "332992:None,\n",
    "333111:335203,\n",
    "333415:217103,\n",
    "333611:335103,\n",
    "333618:335103,\n",
    "333921:None,\n",
    "333996:None,\n",
    "334290:336603,\n",
    "334612:None,\n",
    "335222:336303,\n",
    "335224:336303,\n",
    "335931:336434,\n",
    "336111:337103,\n",
    "336120:337109,\n",
    "336212:337109,\n",
    "336300:337144,\n",
    "336321:337144,\n",
    "336322:337144,\n",
    "336330:337144,\n",
    "336350:337144,\n",
    "336360:337144,\n",
    "336410:337203,\n",
    "336411:337203,\n",
    "336412:337203,\n",
    "336414:337236,\n",
    "336510:337403,\n",
    "336611:337303,\n",
    "336900:337009,\n",
    "336992:None,\n",
    "424410:651414,\n",
    "441110:655303,\n",
    "445110:654103,\n",
    "481111:545103,\n",
    "482111:540002,\n",
    "484121:None,\n",
    "484122:None,\n",
    "484210:None,\n",
    "485110:541002,\n",
    "485111:541002,\n",
    "485112:540002,\n",
    "485113:541002,\n",
    "485310:541002,\n",
    "485991:None,\n",
    "488190:545002,\n",
    "488310:544002,\n",
    "488320:None,\n",
    "488330:None,\n",
    "488490:None,\n",
    "512110:None,\n",
    "517110:548002,\n",
    "517311:548002,\n",
    "524114:763203,\n",
    "561612:None,\n",
    "561720:None,\n",
    "561920:None,\n",
    "562111:None,\n",
    "562219:None,\n",
    "611110:882103,\n",
    "611111:882103,\n",
    "611210:938224,\n",
    "611310:882203,\n",
    "621111:880103,\n",
    "621491:763203,\n",
    "621610:880556,\n",
    "622110:None,\n",
    "622210:None,\n",
    "624110:883503,\n",
    "624410:883503,\n",
    "711211:None,\n",
    "721110:870103,\n",
    "721120:870128,\n",
    "722510:658002,\n",
    "921100:939133,\n",
    "921110:939133,\n",
    "921111:939133,\n",
    "921190:949009\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
<<<<<<< HEAD
   "execution_count": 14,
   "id": "a10afcd3",
   "metadata": {},
=======
   "execution_count": 13,
   "metadata": {
    "scrolled": false
   },
>>>>>>> 06568a1154fed0d7d54efa850de6c3142bfef1df
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Here's a NAICS code we could try to match:488330\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we could try to match:2211\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:562111\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:22131\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:48832\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code in the relevant states.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:92\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No series data available for this industry code in the relevant states.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code in the relevant states.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "Here's a NAICS code we could try to match:336992\n",
      "No series data available for this industry code in the relevant states.\n",
      "There is no data available for this series.\n",
      "No data is available before the work stoppage.\n",
      "There is no data available for this series.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code in the relevant states.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "There is no data available for this series.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:321911\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:92\n",
      "No series data available for this industry code in the relevant states.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we could try to match:54111\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we could try to match:23831\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:92\n",
      "No series data available for this industry code in the relevant states.\n",
      "No data is available before the work stoppage.\n",
      "No data is available before the work stoppage.\n",
      "Here's a NAICS code we could try to match:48849\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:48832\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:53112\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "Here's a NAICS code we could try to match:56172\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:923\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:333996\n",
      "Here's a NAICS code we could try to match:92313\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "There is no data available for this series.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "There is no data available for this series.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "Here's a NAICS code we could try to match:31212\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:484122\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:484122\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:56172\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:56172\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:92214\n",
      "Here's a NAICS code we could try to match:23831\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "There is no data available for this series.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:92\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:332992\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:23831\n",
      "Here's a NAICS code we could try to match:22112\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:92\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:311320\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:92211\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:2211\n",
      "We have some data to use!\n",
      "Here's a NAICS code we could try to match:488320\n",
      "Here's a NAICS code we could try to match:56172\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "Here's a NAICS code we could try to match:334612\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:71111\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:562219\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:561920\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:331310\n",
      "Here's a NAICS code we could try to match:488490\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:333921\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:221110\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "Here's a NAICS code we could try to match:484210\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "There is no data available for this series.\n",
      "Here's a NAICS code we could try to match:561720\n",
      "We have some data to use!\n",
      "There is no data available for this series.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:561612\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "There is no data available for this series.\n",
      "There is no data available for this series.\n",
      "Here's a NAICS code we could try to match:512110\n",
      "Here's a NAICS code we could try to match:485991\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "There is no data available for this series.\n",
      "We have some data to use!\n",
      "Here's a NAICS code we could try to match:561720\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:484121\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "Here's a NAICS code we could try to match:622110\n",
      "We have some data to use!\n",
      "Here's a NAICS code we could try to match:238910\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "Here's a NAICS code we could try to match:622110\n",
      "Here's a NAICS code we could try to match:221112\n",
      "Here's a NAICS code we could try to match:622110\n",
      "Here's a NAICS code we could try to match:711211\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:311313\n",
<<<<<<< HEAD
      "No series data available for this industry code in the relevant states.\n"
=======
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "Here's a NAICS code we could try to match:622110\n"
>>>>>>> 06568a1154fed0d7d54efa850de6c3142bfef1df
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
<<<<<<< HEAD
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "Here's a NAICS code we could try to match:622110\n",
=======
>>>>>>> 06568a1154fed0d7d54efa850de6c3142bfef1df
      "We have some data to use!\n",
      "There is no data available for this series.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "Here's a NAICS code we could try to match:622110\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "Here's a NAICS code we could try to match:622110\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:561612\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "There is no data available for this series.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:561720\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "Here's a NAICS code we could try to match:622110\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622210\n",
      "Here's a NAICS code we could try to match:622210\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "There is no data available for this series.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "No series data available for this industry code in the relevant states.\n",
      "We have some data to use!\n",
      "We have some data to use!\n",
      "Here's a NAICS code we could try to match:622110\n",
      "Here's a NAICS code we could try to match:622110\n",
      "There is no data available for this series.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "No series data available for this industry code in the relevant states.\n",
<<<<<<< HEAD
      "No series data available for this industry code in the relevant states.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
=======
      "No series data available for this industry code in the relevant states.\n",
>>>>>>> 06568a1154fed0d7d54efa850de6c3142bfef1df
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "No series data available for this industry code in the relevant states.\n",
      "Here's a NAICS code we could try to match:622110\n",
      "Here's a NAICS code we could try to match:62\n"
     ]
    }
   ],
   "source": [
    "#This cell goes through the work stoppage data frame and tries to match it up with the SA data\n",
    "#The data is written to a pickle file, so this does not need to be rerun, unless we're \n",
    "#improving the data.\n",
    "\n",
    "#For each work stoppage:  \n",
    "#    Get the SA industry code from the work stoppage NAICS code.\n",
    "#        These are stored in the dictionary naics_to_sa_industry.\n",
    "#    Get the relevant SA series ids from the SA industry code. Since this is state level data,\n",
    "#    we also require that the SA series is for a state appearing in the list of states \n",
    "#    for the work stoppage. \n",
    "#        This fails some of the time, but I don't think there's anyhing to be done\n",
    "#        about it. The data just isn't there.\n",
    "#        If a work stoppage occurred in multiple states and there is data for multiple states,\n",
    "#        we record each different state data in a separate row.\n",
    "#        The translation of an SA state code to a work stoppage state abbreviation is done via\n",
    "#        the dictionary sa_state_code_to_abbr.\n",
    "#    If there is data for the SA series that is from before the work stoppage (at least time_window days), then\n",
    "#        we record the series id. This we can use to look up whatever data we want.\n",
    "\n",
    "\n",
    "rows_to_add = []\n",
    "for index, row in work_stoppage_df.iterrows():\n",
    "    naics_code = row[\"Industry code[1]\"]\n",
    "    states = row[\"States\"]\n",
    "    start_date = row[\"Work stoppage beginning date\"]  \n",
    "    end_date = row[\"Work stoppage ending date\"]  \n",
    "\n",
    "    industry_code = naics_to_sa_industry[naics_code]     \n",
    "    if not industry_code is None:\n",
    "        series_ids = state_series_df[ state_series_df.apply(\n",
    "            lambda x: x[\"industry_code\"]==industry_code and sa_state_code_to_abbr[x[\"state_code\"]] in states, \n",
    "            axis=1)\n",
    "        ][\"series_id\"]       \n",
    "\n",
    "        if len(series_ids)==0:\n",
    "            if be_verbose:\n",
    "                print(\"No series data available for this industry code in the relevant states.\")\n",
    "        else:\n",
    "            for series_id in series_ids:\n",
    "                wage_data = states_metro_employment_stats[\n",
    "                    states_metro_employment_stats[\"series_id\"]==series_id]\n",
    "        \n",
    "                #Sometimes a valid series_id does not have any data.\n",
    "                if len(wage_data)==0:\n",
    "                    if be_verbose:\n",
    "                        print(\"There is no data available for this series.\")\n",
    "                else:\n",
    "                    #Is there sufficient data from before the work stoppage began?\n",
    "                    #This is controlled by the time_window variable.\n",
    "                    ce_year = int(min(wage_data[\"year\"]))\n",
    "                    ce_month = int(min(wage_data[wage_data[\"year\"]==ce_year][\"period\"])[1:])\n",
    "                    ce_date = pd.Timestamp(year=ce_year,month=ce_month,day=1)\n",
    "                    earlier = start_date-time_window\n",
    "    \n",
    "                    #do we got data?\n",
    "                    if earlier >= ce_date:\n",
    "                        if be_verbose:\n",
    "                            print(\"We have some data to use!\")\n",
    "                        organization = row['Organizations involved']                    \n",
    "                        areas = row['Areas']\n",
    "                        ownership = row['Ownership']\n",
    "                        state = state_series_df[state_series_df.series_id==series_id][\"state_code\"]\n",
    "                        state = sa_state_code_to_abbr[state.iloc[0]]\n",
    "                        rows_to_add.append([organization, state, areas, ownership, naics_code, \n",
    "                            start_date, end_date, series_id] )\n",
    "                    else:\n",
    "                        print(\"No data is available before the work stoppage.\")\n",
    "    else:\n",
    "        if be_verbose:\n",
    "            print(f\"Here's a NAICS code we could try to match:{naics_code}\")\n",
    "                \n",
    "clean_state_data = pd.DataFrame( data=rows_to_add,\n",
    "    columns=[\"organization\", \"state\", \"areas\", \"ownership\", \n",
    "        \"naics industry code\", \"start date\", \"end date\", \"series_id\"] )\n",
    "\n",
    "clean_state_data.to_pickle(\".\\\\Cleaned\\\\clean_state_data.pkl\")"
   ]
  },
  {
   "cell_type": "code",
<<<<<<< HEAD
   "execution_count": 20,
   "id": "a8ca1390",
=======
   "execution_count": 14,
>>>>>>> 06568a1154fed0d7d54efa850de6c3142bfef1df
   "metadata": {},
   "outputs": [],
   "source": [
    "#Run this is we're just loading the data instead of loading and cleaning from scratch.\n",
    "clean_national_data = pd.read_pickle(\".\\\\Cleaned\\\\clean_national_data.pkl\")\n",
    "\n",
    "#Run this is we're just loading the data instead of loading and cleaning from scratch.\n",
    "clean_state_data = pd.read_pickle(\".\\\\Cleaned\\\\clean_state_data.pkl\")\n",
    "\n",
    "#Work Stop to pickle:\n",
    "work_stoppage_df.to_pickle(\".\\\\PrelimEDA\\\\work_stop.pkl\")"
   ]
  },
  {
   "cell_type": "code",
<<<<<<< HEAD
   "execution_count": 21,
   "id": "1df8a4de",
=======
   "execution_count": 15,
>>>>>>> 06568a1154fed0d7d54efa850de6c3142bfef1df
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>organization</th>\n",
       "      <th>states</th>\n",
       "      <th>areas</th>\n",
       "      <th>ownership</th>\n",
       "      <th>naics industry code</th>\n",
       "      <th>start date</th>\n",
       "      <th>end date</th>\n",
       "      <th>series_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Bombardier Learjet</td>\n",
       "      <td>[KS]</td>\n",
       "      <td>Wichita</td>\n",
       "      <td>Private industry</td>\n",
       "      <td>336411</td>\n",
       "      <td>2006-10-02</td>\n",
       "      <td>2006-10-23 00:00:00</td>\n",
       "      <td>CES3133641111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ABM Janitorial Services, GCA Services Group, Sanitors Services of Texas, Pritchard Industries Southwest</td>\n",
       "      <td>[TX]</td>\n",
       "      <td>Houston</td>\n",
       "      <td>Private industry</td>\n",
       "      <td>561720</td>\n",
       "      <td>2006-10-23</td>\n",
       "      <td>2006-11-20 00:00:00</td>\n",
       "      <td>CES6056172011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>National Fire Sprinkler Association</td>\n",
       "      <td>[Nationwide]</td>\n",
       "      <td></td>\n",
       "      <td>Private industry</td>\n",
       "      <td>238220</td>\n",
       "      <td>2007-04-01</td>\n",
       "      <td>2007-04-16 00:00:00</td>\n",
       "      <td>CES2023822011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Associated Wall and Ceiling Contractors of Oregon and Southwest Washington, Inc.</td>\n",
       "      <td>[OR,  WA]</td>\n",
       "      <td>Multiple states</td>\n",
       "      <td>Private industry</td>\n",
       "      <td>236220</td>\n",
       "      <td>2007-06-01</td>\n",
       "      <td>2007-06-19 00:00:00</td>\n",
       "      <td>CES2023622011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>National Electrical Contractors of America</td>\n",
       "      <td>[WA]</td>\n",
       "      <td>Puget Sound</td>\n",
       "      <td>Private industry</td>\n",
       "      <td>238210</td>\n",
       "      <td>2007-06-01</td>\n",
       "      <td>2007-06-01 00:00:00</td>\n",
       "      <td>CES2023821011</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                                              organization  \\\n",
       "0                                                                                       Bombardier Learjet   \n",
       "1  ABM Janitorial Services, GCA Services Group, Sanitors Services of Texas, Pritchard Industries Southwest   \n",
       "2                                                                      National Fire Sprinkler Association   \n",
       "3                         Associated Wall and Ceiling Contractors of Oregon and Southwest Washington, Inc.   \n",
       "4                                                               National Electrical Contractors of America   \n",
       "\n",
       "         states            areas         ownership  naics industry code  \\\n",
       "0          [KS]          Wichita  Private industry               336411   \n",
       "1          [TX]          Houston  Private industry               561720   \n",
       "2  [Nationwide]                   Private industry               238220   \n",
       "3     [OR,  WA]  Multiple states  Private industry               236220   \n",
       "4          [WA]      Puget Sound  Private industry               238210   \n",
       "\n",
       "  start date             end date      series_id  \n",
       "0 2006-10-02  2006-10-23 00:00:00  CES3133641111  \n",
       "1 2006-10-23  2006-11-20 00:00:00  CES6056172011  \n",
       "2 2007-04-01  2007-04-16 00:00:00  CES2023822011  \n",
       "3 2007-06-01  2007-06-19 00:00:00  CES2023622011  \n",
       "4 2007-06-01  2007-06-01 00:00:00  CES2023821011  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "There are 111 rows to consider. Let's see the first 5\n",
      "\n",
      "\n",
      "The work stoppage at Bombardier Learjet started on 2006-10-02 00:00:00 and ended on 2006-10-23 00:00:00. The associated wage data is as follows.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>series_id</th>\n",
       "      <th>year</th>\n",
       "      <th>period</th>\n",
       "      <th>value</th>\n",
       "      <th>footnote_codes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>890215</th>\n",
       "      <td>CES3133641111</td>\n",
       "      <td>2006</td>\n",
       "      <td>M05</td>\n",
       "      <td>1292.05</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>890216</th>\n",
       "      <td>CES3133641111</td>\n",
       "      <td>2006</td>\n",
       "      <td>M06</td>\n",
       "      <td>1284.65</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>890217</th>\n",
       "      <td>CES3133641111</td>\n",
       "      <td>2006</td>\n",
       "      <td>M07</td>\n",
       "      <td>1330.34</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>890218</th>\n",
       "      <td>CES3133641111</td>\n",
       "      <td>2006</td>\n",
       "      <td>M08</td>\n",
       "      <td>1350.22</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>890219</th>\n",
       "      <td>CES3133641111</td>\n",
       "      <td>2006</td>\n",
       "      <td>M09</td>\n",
       "      <td>1323.73</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>890220</th>\n",
       "      <td>CES3133641111</td>\n",
       "      <td>2006</td>\n",
       "      <td>M10</td>\n",
       "      <td>1337.55</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>890221</th>\n",
       "      <td>CES3133641111</td>\n",
       "      <td>2006</td>\n",
       "      <td>M11</td>\n",
       "      <td>1375.03</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>890222</th>\n",
       "      <td>CES3133641111</td>\n",
       "      <td>2006</td>\n",
       "      <td>M12</td>\n",
       "      <td>1394.96</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>890223</th>\n",
       "      <td>CES3133641111</td>\n",
       "      <td>2007</td>\n",
       "      <td>M01</td>\n",
       "      <td>1422.10</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>890224</th>\n",
       "      <td>CES3133641111</td>\n",
       "      <td>2007</td>\n",
       "      <td>M02</td>\n",
       "      <td>1428.85</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>890225</th>\n",
       "      <td>CES3133641111</td>\n",
       "      <td>2007</td>\n",
       "      <td>M03</td>\n",
       "      <td>1429.74</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            series_id  year period    value footnote_codes\n",
       "890215  CES3133641111  2006    M05  1292.05            NaN\n",
       "890216  CES3133641111  2006    M06  1284.65            NaN\n",
       "890217  CES3133641111  2006    M07  1330.34            NaN\n",
       "890218  CES3133641111  2006    M08  1350.22            NaN\n",
       "890219  CES3133641111  2006    M09  1323.73            NaN\n",
       "890220  CES3133641111  2006    M10  1337.55            NaN\n",
       "890221  CES3133641111  2006    M11  1375.03            NaN\n",
       "890222  CES3133641111  2006    M12  1394.96            NaN\n",
       "890223  CES3133641111  2007    M01  1422.10            NaN\n",
       "890224  CES3133641111  2007    M02  1428.85            NaN\n",
       "890225  CES3133641111  2007    M03  1429.74            NaN"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "The work stoppage at ABM Janitorial Services, GCA Services Group, Sanitors Services of Texas, Pritchard Industries Southwest started on 2006-10-23 00:00:00 and ended on 2006-11-20 00:00:00. The associated wage data is as follows.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>series_id</th>\n",
       "      <th>year</th>\n",
       "      <th>period</th>\n",
       "      <th>value</th>\n",
       "      <th>footnote_codes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2960996</th>\n",
       "      <td>CES6056172011</td>\n",
       "      <td>2006</td>\n",
       "      <td>M05</td>\n",
       "      <td>341.89</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2960997</th>\n",
       "      <td>CES6056172011</td>\n",
       "      <td>2006</td>\n",
       "      <td>M06</td>\n",
       "      <td>337.27</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2960998</th>\n",
       "      <td>CES6056172011</td>\n",
       "      <td>2006</td>\n",
       "      <td>M07</td>\n",
       "      <td>337.84</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2960999</th>\n",
       "      <td>CES6056172011</td>\n",
       "      <td>2006</td>\n",
       "      <td>M08</td>\n",
       "      <td>339.30</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2961000</th>\n",
       "      <td>CES6056172011</td>\n",
       "      <td>2006</td>\n",
       "      <td>M09</td>\n",
       "      <td>342.81</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2961001</th>\n",
       "      <td>CES6056172011</td>\n",
       "      <td>2006</td>\n",
       "      <td>M10</td>\n",
       "      <td>344.56</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2961002</th>\n",
       "      <td>CES6056172011</td>\n",
       "      <td>2006</td>\n",
       "      <td>M11</td>\n",
       "      <td>343.04</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2961003</th>\n",
       "      <td>CES6056172011</td>\n",
       "      <td>2006</td>\n",
       "      <td>M12</td>\n",
       "      <td>353.80</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2961004</th>\n",
       "      <td>CES6056172011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M01</td>\n",
       "      <td>346.51</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2961005</th>\n",
       "      <td>CES6056172011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M02</td>\n",
       "      <td>348.53</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2961006</th>\n",
       "      <td>CES6056172011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M03</td>\n",
       "      <td>349.74</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2961007</th>\n",
       "      <td>CES6056172011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M04</td>\n",
       "      <td>348.87</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             series_id  year period   value footnote_codes\n",
       "2960996  CES6056172011  2006    M05  341.89            NaN\n",
       "2960997  CES6056172011  2006    M06  337.27            NaN\n",
       "2960998  CES6056172011  2006    M07  337.84            NaN\n",
       "2960999  CES6056172011  2006    M08  339.30            NaN\n",
       "2961000  CES6056172011  2006    M09  342.81            NaN\n",
       "2961001  CES6056172011  2006    M10  344.56            NaN\n",
       "2961002  CES6056172011  2006    M11  343.04            NaN\n",
       "2961003  CES6056172011  2006    M12  353.80            NaN\n",
       "2961004  CES6056172011  2007    M01  346.51            NaN\n",
       "2961005  CES6056172011  2007    M02  348.53            NaN\n",
       "2961006  CES6056172011  2007    M03  349.74            NaN\n",
       "2961007  CES6056172011  2007    M04  348.87            NaN"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "The work stoppage at National Fire Sprinkler Association started on 2007-04-01 00:00:00 and ended on 2007-04-16 00:00:00. The associated wage data is as follows.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>series_id</th>\n",
       "      <th>year</th>\n",
       "      <th>period</th>\n",
       "      <th>value</th>\n",
       "      <th>footnote_codes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>303796</th>\n",
       "      <td>CES2023822011</td>\n",
       "      <td>2006</td>\n",
       "      <td>M11</td>\n",
       "      <td>836.26</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303797</th>\n",
       "      <td>CES2023822011</td>\n",
       "      <td>2006</td>\n",
       "      <td>M12</td>\n",
       "      <td>838.50</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303798</th>\n",
       "      <td>CES2023822011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M01</td>\n",
       "      <td>844.49</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303799</th>\n",
       "      <td>CES2023822011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M02</td>\n",
       "      <td>847.48</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303800</th>\n",
       "      <td>CES2023822011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M03</td>\n",
       "      <td>850.88</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303801</th>\n",
       "      <td>CES2023822011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M04</td>\n",
       "      <td>856.09</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303802</th>\n",
       "      <td>CES2023822011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M05</td>\n",
       "      <td>864.75</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303803</th>\n",
       "      <td>CES2023822011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M06</td>\n",
       "      <td>871.50</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303804</th>\n",
       "      <td>CES2023822011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M07</td>\n",
       "      <td>871.13</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303805</th>\n",
       "      <td>CES2023822011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M08</td>\n",
       "      <td>879.92</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303806</th>\n",
       "      <td>CES2023822011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M09</td>\n",
       "      <td>884.52</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            series_id  year period   value footnote_codes\n",
       "303796  CES2023822011  2006    M11  836.26            NaN\n",
       "303797  CES2023822011  2006    M12  838.50            NaN\n",
       "303798  CES2023822011  2007    M01  844.49            NaN\n",
       "303799  CES2023822011  2007    M02  847.48            NaN\n",
       "303800  CES2023822011  2007    M03  850.88            NaN\n",
       "303801  CES2023822011  2007    M04  856.09            NaN\n",
       "303802  CES2023822011  2007    M05  864.75            NaN\n",
       "303803  CES2023822011  2007    M06  871.50            NaN\n",
       "303804  CES2023822011  2007    M07  871.13            NaN\n",
       "303805  CES2023822011  2007    M08  879.92            NaN\n",
       "303806  CES2023822011  2007    M09  884.52            NaN"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "The work stoppage at Associated Wall and Ceiling Contractors of Oregon and Southwest Washington, Inc. started on 2007-06-01 00:00:00 and ended on 2007-06-19 00:00:00. The associated wage data is as follows.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>series_id</th>\n",
       "      <th>year</th>\n",
       "      <th>period</th>\n",
       "      <th>value</th>\n",
       "      <th>footnote_codes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>179732</th>\n",
       "      <td>CES2023622011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M01</td>\n",
       "      <td>1026.91</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>179733</th>\n",
       "      <td>CES2023622011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M02</td>\n",
       "      <td>1024.78</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>179734</th>\n",
       "      <td>CES2023622011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M03</td>\n",
       "      <td>1019.46</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>179735</th>\n",
       "      <td>CES2023622011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M04</td>\n",
       "      <td>1040.66</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>179736</th>\n",
       "      <td>CES2023622011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M05</td>\n",
       "      <td>1041.94</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>179737</th>\n",
       "      <td>CES2023622011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M06</td>\n",
       "      <td>1042.80</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>179738</th>\n",
       "      <td>CES2023622011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M07</td>\n",
       "      <td>1042.08</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>179739</th>\n",
       "      <td>CES2023622011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M08</td>\n",
       "      <td>1034.45</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>179740</th>\n",
       "      <td>CES2023622011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M09</td>\n",
       "      <td>1037.62</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>179741</th>\n",
       "      <td>CES2023622011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M10</td>\n",
       "      <td>1039.35</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>179742</th>\n",
       "      <td>CES2023622011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M11</td>\n",
       "      <td>1045.67</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            series_id  year period    value footnote_codes\n",
       "179732  CES2023622011  2007    M01  1026.91            NaN\n",
       "179733  CES2023622011  2007    M02  1024.78            NaN\n",
       "179734  CES2023622011  2007    M03  1019.46            NaN\n",
       "179735  CES2023622011  2007    M04  1040.66            NaN\n",
       "179736  CES2023622011  2007    M05  1041.94            NaN\n",
       "179737  CES2023622011  2007    M06  1042.80            NaN\n",
       "179738  CES2023622011  2007    M07  1042.08            NaN\n",
       "179739  CES2023622011  2007    M08  1034.45            NaN\n",
       "179740  CES2023622011  2007    M09  1037.62            NaN\n",
       "179741  CES2023622011  2007    M10  1039.35            NaN\n",
       "179742  CES2023622011  2007    M11  1045.67            NaN"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "The work stoppage at National Electrical Contractors of America started on 2007-06-01 00:00:00 and ended on 2007-06-01 00:00:00. The associated wage data is as follows.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>series_id</th>\n",
       "      <th>year</th>\n",
       "      <th>period</th>\n",
       "      <th>value</th>\n",
       "      <th>footnote_codes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>297648</th>\n",
       "      <td>CES2023821011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M01</td>\n",
       "      <td>917.67</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297649</th>\n",
       "      <td>CES2023821011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M02</td>\n",
       "      <td>915.92</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297650</th>\n",
       "      <td>CES2023821011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M03</td>\n",
       "      <td>923.93</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297651</th>\n",
       "      <td>CES2023821011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M04</td>\n",
       "      <td>923.93</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297652</th>\n",
       "      <td>CES2023821011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M05</td>\n",
       "      <td>935.04</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297653</th>\n",
       "      <td>CES2023821011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M06</td>\n",
       "      <td>934.14</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297654</th>\n",
       "      <td>CES2023821011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M07</td>\n",
       "      <td>933.07</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297655</th>\n",
       "      <td>CES2023821011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M08</td>\n",
       "      <td>939.55</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297656</th>\n",
       "      <td>CES2023821011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M09</td>\n",
       "      <td>944.12</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297657</th>\n",
       "      <td>CES2023821011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M10</td>\n",
       "      <td>953.67</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>297658</th>\n",
       "      <td>CES2023821011</td>\n",
       "      <td>2007</td>\n",
       "      <td>M11</td>\n",
       "      <td>955.20</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            series_id  year period   value footnote_codes\n",
       "297648  CES2023821011  2007    M01  917.67            NaN\n",
       "297649  CES2023821011  2007    M02  915.92            NaN\n",
       "297650  CES2023821011  2007    M03  923.93            NaN\n",
       "297651  CES2023821011  2007    M04  923.93            NaN\n",
       "297652  CES2023821011  2007    M05  935.04            NaN\n",
       "297653  CES2023821011  2007    M06  934.14            NaN\n",
       "297654  CES2023821011  2007    M07  933.07            NaN\n",
       "297655  CES2023821011  2007    M08  939.55            NaN\n",
       "297656  CES2023821011  2007    M09  944.12            NaN\n",
       "297657  CES2023821011  2007    M10  953.67            NaN\n",
       "297658  CES2023821011  2007    M11  955.20            NaN"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#Let's see what we can do with this data now:\n",
    "display(clean_national_data.head())\n",
    "print(f\"There are {len(clean_national_data)} rows to consider. Let's see the first 5\\n\\n\")\n",
    "for j in range(0,5):\n",
    "    row = clean_national_data.iloc[j]\n",
    "    start_date = row['start date']\n",
    "    series_id = row[\"series_id\"]\n",
    "    data = current_employment_statistic_df[\n",
    "        (current_employment_statistic_df[\"series_id\"]==series_id)\n",
    "        &(current_employment_statistic_df[\"period\"]!=\"M13\")]\n",
    "    #M13 is for the annual average\n",
    "\n",
    "    earlier = start_date-time_window\n",
    "    later = start_date+time_window \n",
    "    annoying = lambda row : pd.Timestamp(year=int(row[\"year\"]), month=int(row[\"period\"][1:]),day=1)\n",
    "    data = data[ (data.apply(annoying,axis=1)>=earlier)\n",
    "               & (data.apply(annoying,axis=1)<=later)]\n",
    "\n",
    "    print(f\"The work stoppage at {row['organization']} started on {row['start date']} \"\n",
    "        + f\"and ended on {row['end date']}. The associated wage data is as follows.\")          \n",
    "    display(data)\n",
    "    print(f\"\\n\\n\")"
   ]
  },
  {
<<<<<<< HEAD
=======
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>organization</th>\n",
       "      <th>state</th>\n",
       "      <th>areas</th>\n",
       "      <th>ownership</th>\n",
       "      <th>naics industry code</th>\n",
       "      <th>start date</th>\n",
       "      <th>end date</th>\n",
       "      <th>series_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Detroit Newspapers</td>\n",
       "      <td>MI</td>\n",
       "      <td>Detroit</td>\n",
       "      <td>Private industry</td>\n",
       "      <td>51111</td>\n",
       "      <td>1995-07-13</td>\n",
       "      <td>1997-02-19</td>\n",
       "      <td>SAU2600004271034</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Detroit Newspapers</td>\n",
       "      <td>MI</td>\n",
       "      <td>Detroit</td>\n",
       "      <td>Private industry</td>\n",
       "      <td>51111</td>\n",
       "      <td>1995-07-13</td>\n",
       "      <td>1997-02-19</td>\n",
       "      <td>SAU2621604271034</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Boeing Company</td>\n",
       "      <td>KS</td>\n",
       "      <td>Multiple states</td>\n",
       "      <td>Private industry</td>\n",
       "      <td>336411</td>\n",
       "      <td>1995-10-06</td>\n",
       "      <td>1995-12-14</td>\n",
       "      <td>SAU2000003372034</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>General Electric Corp.</td>\n",
       "      <td>PA</td>\n",
       "      <td>Erie</td>\n",
       "      <td>Private industry</td>\n",
       "      <td>336510</td>\n",
       "      <td>1995-10-26</td>\n",
       "      <td>1995-10-30</td>\n",
       "      <td>SAU4200003374034</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Chrysler Corp., McGraw Glass Division</td>\n",
       "      <td>MI</td>\n",
       "      <td>Detroit</td>\n",
       "      <td>Private industry</td>\n",
       "      <td>3361</td>\n",
       "      <td>1995-11-01</td>\n",
       "      <td>1995-11-04</td>\n",
       "      <td>SAU2600003371144</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                            organization state            areas  \\\n",
       "0                     Detroit Newspapers    MI          Detroit   \n",
       "1                     Detroit Newspapers    MI          Detroit   \n",
       "2                         Boeing Company    KS  Multiple states   \n",
       "3                 General Electric Corp.    PA             Erie   \n",
       "4  Chrysler Corp., McGraw Glass Division    MI          Detroit   \n",
       "\n",
       "          ownership  naics industry code start date   end date  \\\n",
       "0  Private industry                51111 1995-07-13 1997-02-19   \n",
       "1  Private industry                51111 1995-07-13 1997-02-19   \n",
       "2  Private industry               336411 1995-10-06 1995-12-14   \n",
       "3  Private industry               336510 1995-10-26 1995-10-30   \n",
       "4  Private industry                 3361 1995-11-01 1995-11-04   \n",
       "\n",
       "          series_id  \n",
       "0  SAU2600004271034  \n",
       "1  SAU2621604271034  \n",
       "2  SAU2000003372034  \n",
       "3  SAU4200003374034  \n",
       "4  SAU2600003371144  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "There are 113 rows to consider. Let's see the first 5\n",
      "\n",
      "\n",
      "The work stoppage at Detroit Newspapers started on 1995-07-13 00:00:00 and ended on 1997-02-19 00:00:00. The associated wage data is as follows.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>series_id</th>\n",
       "      <th>year</th>\n",
       "      <th>period</th>\n",
       "      <th>value</th>\n",
       "      <th>footnote_codes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1217384</th>\n",
       "      <td>SAU2600004271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M02</td>\n",
       "      <td>396.14</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1217385</th>\n",
       "      <td>SAU2600004271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M03</td>\n",
       "      <td>399.67</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1217386</th>\n",
       "      <td>SAU2600004271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M04</td>\n",
       "      <td>403.51</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1217387</th>\n",
       "      <td>SAU2600004271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M05</td>\n",
       "      <td>413.45</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1217388</th>\n",
       "      <td>SAU2600004271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M06</td>\n",
       "      <td>409.90</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1217389</th>\n",
       "      <td>SAU2600004271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M07</td>\n",
       "      <td>406.56</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1217390</th>\n",
       "      <td>SAU2600004271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M08</td>\n",
       "      <td>392.47</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1217391</th>\n",
       "      <td>SAU2600004271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M09</td>\n",
       "      <td>387.64</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1217392</th>\n",
       "      <td>SAU2600004271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M10</td>\n",
       "      <td>386.97</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1217393</th>\n",
       "      <td>SAU2600004271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M11</td>\n",
       "      <td>402.12</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1217394</th>\n",
       "      <td>SAU2600004271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M12</td>\n",
       "      <td>413.79</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1217396</th>\n",
       "      <td>SAU2600004271034</td>\n",
       "      <td>1996</td>\n",
       "      <td>M01</td>\n",
       "      <td>371.76</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                series_id  year period   value footnote_codes\n",
       "1217384  SAU2600004271034  1995    M02  396.14            NaN\n",
       "1217385  SAU2600004271034  1995    M03  399.67            NaN\n",
       "1217386  SAU2600004271034  1995    M04  403.51            NaN\n",
       "1217387  SAU2600004271034  1995    M05  413.45            NaN\n",
       "1217388  SAU2600004271034  1995    M06  409.90            NaN\n",
       "1217389  SAU2600004271034  1995    M07  406.56            NaN\n",
       "1217390  SAU2600004271034  1995    M08  392.47            NaN\n",
       "1217391  SAU2600004271034  1995    M09  387.64            NaN\n",
       "1217392  SAU2600004271034  1995    M10  386.97            NaN\n",
       "1217393  SAU2600004271034  1995    M11  402.12            NaN\n",
       "1217394  SAU2600004271034  1995    M12  413.79            NaN\n",
       "1217396  SAU2600004271034  1996    M01  371.76            NaN"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "The work stoppage at Detroit Newspapers started on 1995-07-13 00:00:00 and ended on 1997-02-19 00:00:00. The associated wage data is as follows.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>series_id</th>\n",
       "      <th>year</th>\n",
       "      <th>period</th>\n",
       "      <th>value</th>\n",
       "      <th>footnote_codes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1277512</th>\n",
       "      <td>SAU2621604271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M02</td>\n",
       "      <td>353.38</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1277513</th>\n",
       "      <td>SAU2621604271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M03</td>\n",
       "      <td>348.42</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1277514</th>\n",
       "      <td>SAU2621604271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M04</td>\n",
       "      <td>358.75</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1277515</th>\n",
       "      <td>SAU2621604271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M05</td>\n",
       "      <td>351.65</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1277516</th>\n",
       "      <td>SAU2621604271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M06</td>\n",
       "      <td>359.67</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1277517</th>\n",
       "      <td>SAU2621604271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M07</td>\n",
       "      <td>358.87</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1277518</th>\n",
       "      <td>SAU2621604271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M08</td>\n",
       "      <td>297.60</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1277519</th>\n",
       "      <td>SAU2621604271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M09</td>\n",
       "      <td>293.68</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1277520</th>\n",
       "      <td>SAU2621604271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M10</td>\n",
       "      <td>305.97</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1277521</th>\n",
       "      <td>SAU2621604271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M11</td>\n",
       "      <td>316.87</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1277522</th>\n",
       "      <td>SAU2621604271034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M12</td>\n",
       "      <td>335.59</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1277524</th>\n",
       "      <td>SAU2621604271034</td>\n",
       "      <td>1996</td>\n",
       "      <td>M01</td>\n",
       "      <td>330.73</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                series_id  year period   value footnote_codes\n",
       "1277512  SAU2621604271034  1995    M02  353.38            NaN\n",
       "1277513  SAU2621604271034  1995    M03  348.42            NaN\n",
       "1277514  SAU2621604271034  1995    M04  358.75            NaN\n",
       "1277515  SAU2621604271034  1995    M05  351.65            NaN\n",
       "1277516  SAU2621604271034  1995    M06  359.67            NaN\n",
       "1277517  SAU2621604271034  1995    M07  358.87            NaN\n",
       "1277518  SAU2621604271034  1995    M08  297.60            NaN\n",
       "1277519  SAU2621604271034  1995    M09  293.68            NaN\n",
       "1277520  SAU2621604271034  1995    M10  305.97            NaN\n",
       "1277521  SAU2621604271034  1995    M11  316.87            NaN\n",
       "1277522  SAU2621604271034  1995    M12  335.59            NaN\n",
       "1277524  SAU2621604271034  1996    M01  330.73            NaN"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "The work stoppage at Boeing Company started on 1995-10-06 00:00:00 and ended on 1995-12-14 00:00:00. The associated wage data is as follows.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>series_id</th>\n",
       "      <th>year</th>\n",
       "      <th>period</th>\n",
       "      <th>value</th>\n",
       "      <th>footnote_codes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>980645</th>\n",
       "      <td>SAU2000003372034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M05</td>\n",
       "      <td>752.60</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980646</th>\n",
       "      <td>SAU2000003372034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M06</td>\n",
       "      <td>748.78</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980647</th>\n",
       "      <td>SAU2000003372034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M07</td>\n",
       "      <td>712.80</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980648</th>\n",
       "      <td>SAU2000003372034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M08</td>\n",
       "      <td>703.34</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980649</th>\n",
       "      <td>SAU2000003372034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M09</td>\n",
       "      <td>726.97</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980650</th>\n",
       "      <td>SAU2000003372034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M10</td>\n",
       "      <td>460.46</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980651</th>\n",
       "      <td>SAU2000003372034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M11</td>\n",
       "      <td>628.85</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980652</th>\n",
       "      <td>SAU2000003372034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M12</td>\n",
       "      <td>564.20</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980654</th>\n",
       "      <td>SAU2000003372034</td>\n",
       "      <td>1996</td>\n",
       "      <td>M01</td>\n",
       "      <td>808.75</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980655</th>\n",
       "      <td>SAU2000003372034</td>\n",
       "      <td>1996</td>\n",
       "      <td>M02</td>\n",
       "      <td>822.74</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980656</th>\n",
       "      <td>SAU2000003372034</td>\n",
       "      <td>1996</td>\n",
       "      <td>M03</td>\n",
       "      <td>827.89</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980657</th>\n",
       "      <td>SAU2000003372034</td>\n",
       "      <td>1996</td>\n",
       "      <td>M04</td>\n",
       "      <td>816.82</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               series_id  year period   value footnote_codes\n",
       "980645  SAU2000003372034  1995    M05  752.60            NaN\n",
       "980646  SAU2000003372034  1995    M06  748.78            NaN\n",
       "980647  SAU2000003372034  1995    M07  712.80            NaN\n",
       "980648  SAU2000003372034  1995    M08  703.34            NaN\n",
       "980649  SAU2000003372034  1995    M09  726.97            NaN\n",
       "980650  SAU2000003372034  1995    M10  460.46            NaN\n",
       "980651  SAU2000003372034  1995    M11  628.85            NaN\n",
       "980652  SAU2000003372034  1995    M12  564.20            NaN\n",
       "980654  SAU2000003372034  1996    M01  808.75            NaN\n",
       "980655  SAU2000003372034  1996    M02  822.74            NaN\n",
       "980656  SAU2000003372034  1996    M03  827.89            NaN\n",
       "980657  SAU2000003372034  1996    M04  816.82            NaN"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "The work stoppage at General Electric Corp. started on 1995-10-26 00:00:00 and ended on 1995-10-30 00:00:00. The associated wage data is as follows.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>series_id</th>\n",
       "      <th>year</th>\n",
       "      <th>period</th>\n",
       "      <th>value</th>\n",
       "      <th>footnote_codes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2101097</th>\n",
       "      <td>SAU4200003374034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M05</td>\n",
       "      <td>681.36</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2101098</th>\n",
       "      <td>SAU4200003374034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M06</td>\n",
       "      <td>677.16</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2101099</th>\n",
       "      <td>SAU4200003374034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M07</td>\n",
       "      <td>683.76</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2101100</th>\n",
       "      <td>SAU4200003374034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M08</td>\n",
       "      <td>685.03</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2101101</th>\n",
       "      <td>SAU4200003374034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M09</td>\n",
       "      <td>683.35</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2101102</th>\n",
       "      <td>SAU4200003374034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M10</td>\n",
       "      <td>682.49</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2101103</th>\n",
       "      <td>SAU4200003374034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M11</td>\n",
       "      <td>688.80</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2101104</th>\n",
       "      <td>SAU4200003374034</td>\n",
       "      <td>1995</td>\n",
       "      <td>M12</td>\n",
       "      <td>691.56</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2101106</th>\n",
       "      <td>SAU4200003374034</td>\n",
       "      <td>1996</td>\n",
       "      <td>M01</td>\n",
       "      <td>668.05</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2101107</th>\n",
       "      <td>SAU4200003374034</td>\n",
       "      <td>1996</td>\n",
       "      <td>M02</td>\n",
       "      <td>693.31</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2101108</th>\n",
       "      <td>SAU4200003374034</td>\n",
       "      <td>1996</td>\n",
       "      <td>M03</td>\n",
       "      <td>689.52</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2101109</th>\n",
       "      <td>SAU4200003374034</td>\n",
       "      <td>1996</td>\n",
       "      <td>M04</td>\n",
       "      <td>714.27</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                series_id  year period   value footnote_codes\n",
       "2101097  SAU4200003374034  1995    M05  681.36            NaN\n",
       "2101098  SAU4200003374034  1995    M06  677.16            NaN\n",
       "2101099  SAU4200003374034  1995    M07  683.76            NaN\n",
       "2101100  SAU4200003374034  1995    M08  685.03            NaN\n",
       "2101101  SAU4200003374034  1995    M09  683.35            NaN\n",
       "2101102  SAU4200003374034  1995    M10  682.49            NaN\n",
       "2101103  SAU4200003374034  1995    M11  688.80            NaN\n",
       "2101104  SAU4200003374034  1995    M12  691.56            NaN\n",
       "2101106  SAU4200003374034  1996    M01  668.05            NaN\n",
       "2101107  SAU4200003374034  1996    M02  693.31            NaN\n",
       "2101108  SAU4200003374034  1996    M03  689.52            NaN\n",
       "2101109  SAU4200003374034  1996    M04  714.27            NaN"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n",
      "The work stoppage at Chrysler Corp., McGraw Glass Division started on 1995-11-01 00:00:00 and ended on 1995-11-04 00:00:00. The associated wage data is as follows.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>series_id</th>\n",
       "      <th>year</th>\n",
       "      <th>period</th>\n",
       "      <th>value</th>\n",
       "      <th>footnote_codes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1207392</th>\n",
       "      <td>SAU2600003371144</td>\n",
       "      <td>1995</td>\n",
       "      <td>M06</td>\n",
       "      <td>1029.13</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1207393</th>\n",
       "      <td>SAU2600003371144</td>\n",
       "      <td>1995</td>\n",
       "      <td>M07</td>\n",
       "      <td>1016.29</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1207394</th>\n",
       "      <td>SAU2600003371144</td>\n",
       "      <td>1995</td>\n",
       "      <td>M08</td>\n",
       "      <td>998.20</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1207395</th>\n",
       "      <td>SAU2600003371144</td>\n",
       "      <td>1995</td>\n",
       "      <td>M09</td>\n",
       "      <td>1055.26</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1207396</th>\n",
       "      <td>SAU2600003371144</td>\n",
       "      <td>1995</td>\n",
       "      <td>M10</td>\n",
       "      <td>1041.30</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1207397</th>\n",
       "      <td>SAU2600003371144</td>\n",
       "      <td>1995</td>\n",
       "      <td>M11</td>\n",
       "      <td>1048.32</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1207398</th>\n",
       "      <td>SAU2600003371144</td>\n",
       "      <td>1995</td>\n",
       "      <td>M12</td>\n",
       "      <td>1051.18</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1207400</th>\n",
       "      <td>SAU2600003371144</td>\n",
       "      <td>1996</td>\n",
       "      <td>M01</td>\n",
       "      <td>967.15</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1207401</th>\n",
       "      <td>SAU2600003371144</td>\n",
       "      <td>1996</td>\n",
       "      <td>M02</td>\n",
       "      <td>1004.39</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1207402</th>\n",
       "      <td>SAU2600003371144</td>\n",
       "      <td>1996</td>\n",
       "      <td>M03</td>\n",
       "      <td>853.16</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1207403</th>\n",
       "      <td>SAU2600003371144</td>\n",
       "      <td>1996</td>\n",
       "      <td>M04</td>\n",
       "      <td>1032.41</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                series_id  year period    value footnote_codes\n",
       "1207392  SAU2600003371144  1995    M06  1029.13            NaN\n",
       "1207393  SAU2600003371144  1995    M07  1016.29            NaN\n",
       "1207394  SAU2600003371144  1995    M08   998.20            NaN\n",
       "1207395  SAU2600003371144  1995    M09  1055.26            NaN\n",
       "1207396  SAU2600003371144  1995    M10  1041.30            NaN\n",
       "1207397  SAU2600003371144  1995    M11  1048.32            NaN\n",
       "1207398  SAU2600003371144  1995    M12  1051.18            NaN\n",
       "1207400  SAU2600003371144  1996    M01   967.15            NaN\n",
       "1207401  SAU2600003371144  1996    M02  1004.39            NaN\n",
       "1207402  SAU2600003371144  1996    M03   853.16            NaN\n",
       "1207403  SAU2600003371144  1996    M04  1032.41            NaN"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "#Let's see what we can do with this data now:\n",
    "display(clean_state_data.head())\n",
    "print(f\"There are {len(clean_state_data)} rows to consider. Let's see the first 5\\n\\n\")\n",
    "for j in range(0,5):\n",
    "    row = clean_state_data.iloc[j]\n",
    "    start_date = row['start date']\n",
    "    series_id = row[\"series_id\"]\n",
    "    data = states_metro_employment_stats[ (states_metro_employment_stats[\"series_id\"]==series_id)\n",
    "        &(states_metro_employment_stats[\"period\"]!=\"M13\")]\n",
    "    #M13 is for the annual average\n",
    "\n",
    "    earlier = start_date-time_window\n",
    "    later = start_date+time_window \n",
    "    annoying = lambda row : pd.Timestamp(year=int(row[\"year\"]), month=int(row[\"period\"][1:]),day=1)\n",
    "    data = data[ (data.apply(annoying,axis=1)>=earlier)\n",
    "               & (data.apply(annoying,axis=1)<=later)]\n",
    "\n",
    "    print(f\"The work stoppage at {row['organization']} started on {row['start date']} \"\n",
    "        + f\"and ended on {row['end date']}. The associated wage data is as follows.\")          \n",
    "    display(data)\n",
    "    print(f\"\\n\\n\")"
   ]
  },
  {
>>>>>>> 06568a1154fed0d7d54efa850de6c3142bfef1df
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Data Cleaning and Transformations for Visualizations:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
<<<<<<< HEAD
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/nyssacornelius/opt/anaconda3/envs/jupenv2/lib/python3.7/site-packages/IPython/core/interactiveshell.py:3166: DtypeWarning:\n",
      "\n",
      "Columns (3) have mixed types.Specify dtype option on import or set low_memory=False.\n",
      "\n"
=======
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: '.\\\\PrelimEDA\\\\work_stop.pkl'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-17-c13d6c6f840b>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     11\u001b[0m \u001b[0mclean_national_data\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_pickle\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\".\\\\Cleaned\\\\clean_national_data.pkl\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     12\u001b[0m \u001b[0mclean_state_data\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_pickle\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\".\\\\Cleaned\\\\clean_state_data.pkl\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 13\u001b[1;33m \u001b[0mwork_stop\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_pickle\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\".\\\\PrelimEDA\\\\work_stop.pkl\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     14\u001b[0m \u001b[0mminwagestate\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_pickle\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\".\\\\PrelimEDA\\\\minwagestate.pkl\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     15\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\users\\christopher.jennings\\dropbox\\masters\\class stuff\\comp 4447 stuff\\lib\\site-packages\\pandas\\io\\pickle.py\u001b[0m in \u001b[0;36mread_pickle\u001b[1;34m(filepath_or_buffer, compression, storage_options)\u001b[0m\n\u001b[0;32m    188\u001b[0m         \u001b[0mcompression\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcompression\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    189\u001b[0m         \u001b[0mis_text\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 190\u001b[1;33m         \u001b[0mstorage_options\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mstorage_options\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    191\u001b[0m     ) as handles:\n\u001b[0;32m    192\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\users\\christopher.jennings\\dropbox\\masters\\class stuff\\comp 4447 stuff\\lib\\site-packages\\pandas\\io\\common.py\u001b[0m in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m    649\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    650\u001b[0m             \u001b[1;31m# Binary mode\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 651\u001b[1;33m             \u001b[0mhandle\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mhandle\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mioargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    652\u001b[0m         \u001b[0mhandles\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mhandle\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    653\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '.\\\\PrelimEDA\\\\work_stop.pkl'"
>>>>>>> 06568a1154fed0d7d54efa850de6c3142bfef1df
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import statsmodels\n",
    "import pickle\n",
    "import plotly.express as px\n",
    "from scipy import stats as sts\n",
    "from scipy.stats import pearsonr\n",
    "from statsmodels.stats.multicomp import pairwise_tukeyhsd\n",
    "\n",
    "clean_national_data = pd.read_pickle(\".\\\\Cleaned\\\\clean_national_data.pkl\")\n",
    "clean_state_data = pd.read_pickle(\".\\\\Cleaned\\\\clean_state_data.pkl\")\n",
    "work_stop = pd.read_pickle(\".\\\\PrelimEDA\\\\work_stop.pkl\")\n",
    "# minwagestate = pd.read_pickle(\".\\\\PrelimEDA\\\\minwagestate.pkl\")\n",
    "\n",
    "minwagestate = pd.read_csv(\".//min_wage_state.csv\", usecols=[\"Year\", \"State\", \"Federal.Minimum.Wage\", \"Effective.Minimum.Wage\",\"Effective.Minimum.Wage.2020.Dollars\",\"CPI.Average\"])\n",
    "\n",
    "\n",
    "minwagestate = minwagestate[(minwagestate['State']!= 'District of Columbia') &\\\n",
    "                          (minwagestate['State']!= 'U.S. Virgin Islands') &\\\n",
    "                              (minwagestate['State']!= 'Country Of Mexico') &\\\n",
    "                                  (minwagestate['State']!= 'Puerto Rico') &\\\n",
    "                                  (minwagestate['State']!= 'Guam')]\n",
    "\n",
    "\n",
    "# CLEAN UP DATA:\n",
    "#Minimum wage:\n",
    "minwagestate['State'] = minwagestate['State'].astype('string')\n",
    "\n",
    "us_state_abbrev = {'Alabama': 'AL', 'Alaska': 'AK',\n",
    "'Arizona': 'AZ','Arkansas': 'AR',\n",
    "'California': 'CA',\n",
    "    'Colorado': 'CO',\n",
    "    'Connecticut': 'CT',\n",
    "    'Delaware': 'DE',\n",
    "    'Florida': 'FL',\n",
    "    'Georgia': 'GA',\n",
    "    'Hawaii': 'HI',\n",
    "    'Idaho': 'ID',\n",
    "    'Illinois': 'IL',\n",
    "    'Indiana': 'IN',\n",
    "    'Iowa': 'IA',\n",
    "    'Kansas': 'KS',\n",
    "    'Kentucky': 'KY',\n",
    "    'Louisiana': 'LA',\n",
    "    'Maine': 'ME',\n",
    "    'Maryland': 'MD',\n",
    "    'Massachusetts': 'MA',\n",
    "    'Michigan': 'MI',\n",
    "    'Minnesota': 'MN',\n",
    "    'Mississippi': 'MS',\n",
    "    'Missouri': 'MO',\n",
    "    'Montana': 'MT',\n",
    "    'Nebraska': 'NE',\n",
    "    'Nevada': 'NV',\n",
    "    'New Hampshire': 'NH',\n",
    "    'New Jersey': 'NJ',\n",
    "    'New Mexico': 'NM',\n",
    "    'New York': 'NY',\n",
    "    'North Carolina': 'NC',\n",
    "    'North Dakota': 'ND',\n",
    "    'Ohio': 'OH',\n",
    "    'Oklahoma': 'OK',\n",
    "    'Oregon': 'OR',\n",
    "    'Pennsylvania': 'PA',\n",
    "    'Rhode Island': 'RI',\n",
    "    'South Carolina': 'SC',\n",
    "    'South Dakota': 'SD',\n",
    "    'Tennessee': 'TN',\n",
    "    'Texas': 'TX',\n",
    "    'Utah': 'UT',\n",
    "    'Vermont': 'VT',\n",
    "    'Virginia': 'VA',\n",
    "    'Washington': 'WA',\n",
    "    'West Virginia': 'WV',\n",
    "    'Wisconsin': 'WI',\n",
    "    'Wyoming': 'WY'\n",
    "}                                                           \n",
    "\n",
    "minwagestate['StateCode'] = minwagestate['State'].map(us_state_abbrev)                                                             \n",
    "\n",
    "#Work stoppage:\n",
    "work_stop = work_stop.rename(columns={'Days idle, cumulative for this work stoppage[3]': 'TotalDaysIdle', 'Industry code[1]': 'IndustryCode', 'Work stoppage beginning date': 'StartDate', 'Work stoppage ending date': 'EndDate'})\n",
    "\n",
    "# #Remove the weird [4] and make the column an integer data type:\n",
    "work_stop['TotalDaysIdle'] = pd.to_numeric(work_stop['TotalDaysIdle'], errors='coerce', downcast='integer')\n",
    "work_stop['TotalDaysIdle'] = work_stop['TotalDaysIdle'].astype('Int64')\n",
    "\n",
    "# #Fix workstop end date:\n",
    "work_stop['EndDate'] = pd.to_datetime(work_stop['EndDate'], errors='coerce', format = '%Y-%m-%d')\n",
    "\n",
    "# #Column for duration of work stoppage:\n",
    "#     #Represents number of days\n",
    "work_stop['WSDuration'] = (work_stop['EndDate'] - work_stop['StartDate'])/np.timedelta64(1,'D')+1\n",
    "work_stop['WSDuration'] = work_stop['WSDuration']+1\n",
    "\n",
    "#Change states from string to list of strings:\n",
    "work_stop['States'] = work_stop['States'].str.split(\",\")\n",
    "\n",
    "#Minimum wage state data:\n",
    "# minwagestate.hist(column='Effective.Minimum.Wage')\n",
    "# minwagestate.boxplot(column='Effective.Minimum.Wage', by = [\"State\"], rot = 75)\n",
    "\n",
    "\n",
    "#Work Stoppage state data:\n",
    "    #Need to quantify data by state using list comprehension as states are in lists:\n",
    "#Are these two essentially the same? Neither are truly normal, left skewed.\n",
    "# work_stop.hist(column='WSDuration')\n",
    "# work_stop.hist(column = \"TotalDaysIdle\")\n",
    "    \n",
    "    \n",
    "#INDUSTRY DATA INFORMATION:\n",
    "    #Willing to go down to 3-digit NAICS code, if not fruitful then up to only 2-digit NAICS\n",
    "    #Best file will likely be 2-6 digit NAICS code xlsx file\n",
    "\n",
    "iCodes = pd.read_csv('/Users/nyssacornelius/Desktop/COMP4477/FProj/FProj/2017NAICS_Codes2_6digit.csv', header=0, usecols=[0,1,2], skiprows=[1])\n",
    "iCodes = iCodes.rename(columns={'2017 NAICS US   Code': 'NAICS_Code2017', '2017 NAICS US Title': 'IndustryTitle'})\n",
    "\n",
    "#Remove wonky unicode character:\n",
    "iCodes['IndustryTitle'] = iCodes['IndustryTitle'].str.replace('\\ufffd', '')\n",
    "\n",
    "#Extract only codes with 2-digits:\n",
    "codes2digit = iCodes.loc[iCodes['NAICS_Code2017'].str.contains('^\\d{2}$'), ['NAICS_Code2017']].values\n",
    "\n",
    "#Extract only 3-digit codes:\n",
    "codes3digit = iCodes.loc[iCodes['NAICS_Code2017'].str.contains('^\\d{3}$'), ['NAICS_Code2017']].values\n",
    "\n",
    "#Apply a dictionary to all 3-digit codes:\n",
    "# iCodes['2digit'] = iCodes['NAICS_Code2017'].str.extract(r'(\\d{2})')\n",
    "iCodes['3digit'] = iCodes['NAICS_Code2017'].str.extract(r'(\\d{3})')\n",
    "\n",
    "#Industry code abbreviated for work stop:\n",
    "work_stop['iCodeAb'] = work_stop['IndustryCode'].astype(str).str.extract(r'(\\d{3})')\n",
    "\n",
    "# #fill in nans:\n",
    "work_stop.loc[work_stop['iCodeAb'].isnull(), ['iCodeAb']] = work_stop.loc[work_stop['iCodeAb'].isnull(), ['IndustryCode']].values\n",
    "\n",
    "# #Copy column:\n",
    "work_stop['iTitle'] = work_stop['iCodeAb'].astype(str)\n",
    "\n",
    "# #Get name from industry code:\n",
    "work_stop['iTitle'] = work_stop['iTitle'].replace({k:v for k, v in zip(iCodes['NAICS_Code2017'], iCodes['IndustryTitle'])})\n",
    "\n",
    "\n",
    "#Industry strikes aggregated by count:\n",
    "industryCounts = work_stop['iTitle'].value_counts().reset_index().rename({'index': 'Industry', 'iTitle': 'Counts'}, axis = 1)\n",
    "\n",
    "#Frequency of strikes by state:\n",
    "otherStates = {k:'Other' for k in ['East Coast States', 'Nationwide', 'Interstate']}\n",
    "stateCounts = pd.Series(np.concatenate(work_stop['States'])).str.strip().replace(otherStates)\n",
    "stateCounts = pd.Series(np.where(stateCounts == \"\", None, stateCounts)).value_counts().reset_index().rename({'index': 'State', 0: 'Counts'}, axis = 1)\n",
    "\n",
    "\n",
    "#Begin State-Metro Employment and Wage Data work:\n",
    "smdata = pd.read_csv('https://download.bls.gov/pub/time.series/sm/sm.data.1.AllData', sep = '\\t')\n",
    "\n",
    "smdata.shape\n",
    "smdata.columns\n",
    "\n",
    "#Remove whitespace:\n",
    "smdata.columns = smdata.columns.str.strip()\n",
    "smdata['series_id'] = smdata['series_id'].str.strip()\n",
    "smdata['value'] = smdata.value.astype(str).str.strip()\n",
    "\n",
    "#Convert value to float\n",
    "\n",
    "smdata['state_code'] = smdata['series_id'].str.extract(r'(\\d{2})')\n",
    "smdata = smdata.loc[~smdata['state_code'].isin(['00','11','72','78','99'])]\n",
    "\n",
    "#Create columns for data types in series id:\n",
    "smdata['data_type'] = smdata['series_id'].str.extract(r'(\\d{2}$)')\n",
    "smdata.head(20)\n",
    "\n",
    "data_types = {'01': 'Employees',\n",
    "              '11': 'AvgWeeklyEarnings'\n",
    "              }\n",
    "\n",
    "smdata = smdata.loc[smdata['data_type'].isin(data_types.keys())]\n",
    "smdata['data_type'] = smdata['data_type'].replace(data_types)\n",
    "smdata['value'] = np.where(smdata['value'] == '-', np.nan, smdata['value'])\n",
    "smdata['value'] = smdata['value'].astype(float)\n",
    "\n",
    "smdata['industry_code'] = smdata['series_id'].str.extract(r'\\d{7}(\\d{5})')+'000'\n",
    "\n",
    "\n",
    "indCode = pd.read_csv('https://download.bls.gov/pub/time.series/sm/sm.industry', sep = '\\t', dtype = {'industry_code': str})\n",
    "\n",
    "mergedat = pd.merge(smdata, indCode, on = 'industry_code', how = 'left')\n",
    "\n",
    "mergedat.loc[mergedat['industry_code'] != '00000000']\n",
    "mergedat.loc[mergedat['industry_code'] == '90930000']\n",
    "\n",
    "\n",
    "manInd = ['Utilities', 'Transportation and Warehousing', 'Professional, Scientific, and Technical Services',\n",
    "          'State Government', 'Indian Tribes', 'Logging', 'Educational Services', 'Federal Government',\n",
    "          'Federal Government', 'Federal Government', 'Local Government']\n",
    "manIndreplace = {k:v for k,v in zip(mergedat.loc[mergedat['industry_name'].isnull(), 'industry_code'].unique(), manInd)}\n",
    "\n",
    "mergedat.industry_code.replace(manIndreplace, inplace = True)\n",
    "mergedat.loc[mergedat['industry_name'].isnull(), 'industry_name'] = mergedat.loc[mergedat['industry_name'].isnull(), 'industry_code']\n",
    "\n",
    "stateCodes = pd.read_csv('https://download.bls.gov/pub/time.series/sm/sm.state', sep = '\\t', dtype = {'state_code': str})\n",
    "\n",
    "final_data = pd.merge(mergedat, stateCodes, on = 'state_code', how = 'left')\n",
    "\n",
    "final_data.isnull().sum()\n",
    "final_data.state_name.value_counts()\n",
    "\n",
    "final_data.drop(['series_id', 'period', 'footnote_codes', 'state_code', 'industry_code'], axis = 1, inplace = True)\n",
    "\n",
    "#Make pivot table\n",
    "finalfull = final_data.pivot_table(index = ['state_name','industry_name','year'], columns = 'data_type', values = 'value', aggfunc = 'mean').reset_index()\n",
    "finalfull.columns = finalfull.columns.str.strip()\n",
    "final_earnings = finalfull.copy()\n",
    "final_earnings = final_earnings.dropna().reset_index()\n",
    "final_earnings = final_earnings.drop(labels = 'index', axis = 1)\n",
    "\n",
    "final_earnings['StateCode'] = final_earnings['state_name'].map(us_state_abbrev)\n",
    "finalfull['StateCode'] = finalfull['state_name'].map(us_state_abbrev)\n",
    "\n",
    "\n",
    "#Need to group by industry and state, respectively and take the average over all years since 2007\n",
    "earnInd = final_earnings.groupby(['industry_name'])['AvgWeeklyEarnings'].mean().sort_values(ascending = False)\n",
    "earnInd = earnInd.reset_index()\n",
    "\n",
    "earnState = final_earnings.groupby(['StateCode'])['AvgWeeklyEarnings'].mean().sort_values(ascending = False)\n",
    "earnState = earnState.reset_index()\n",
    "\n",
    "#Education and Health services have two of the highest rates of strike - in mid-low range of average weekly earnings\n",
    "#However, this could be skewed by professionals in health industry that make a great deal more\n",
    "#Can't know from this data\n",
    "#Leisure is lowest average weekly earnings\n",
    "\n",
    "\n",
    "cormin = minwagestate.corr(method='spearman')\n",
    "#years increase eff min in 2020 $ decreases, obviously CPI increases almost 1:1\n",
    "#Min wage 2020 $s decreases as years increase\n",
    "#Min wage 2020 $s decreases as CPI average increases\n",
    "\n",
    "\n",
    "# work_stop.to_pickle(\".\\\\PrelimEDA\\\\work_stop.pkl\")\n",
    "# finalfull.to_pickle(\".\\\\PrelimEDA\\\\finalfull.pkl\")\n",
    "# final_earnings.to_pickle(\".\\\\PrelimEDA\\\\final_earnings.pkl\")\n",
    "# minwagestate.to_pickle(\".\\\\PrelimEDA\\\\minwagestate.pkl\")\n",
    "\n",
    "# final_earnings.industry_name.value_counts()\n",
    "\n",
    "# finalfull.to_csv('finalfull.csv', index = False)\n",
    "# final_earnings.to_csv('final_earnings.csv', index = False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#  Part 2 | Data Visualizations:"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Minimum Wage by State:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = minwagestate\n",
    "\n",
    "#Can add back in to show difference, but this sets scale for all years\n",
    "#range_color = (df['Effective.Minimum.Wage'].min(), df['Effective.Minimum.Wage'].max())\n",
    "\n",
    "#Without, scale is set on year-by-year basis:\n",
    "fig1 = px.choropleth(df, locations = df['StateCode'], locationmode = 'USA-states',\n",
    " color = 'Effective.Minimum.Wage', color_continuous_scale = \"speed\",\n",
    " scope = 'usa', labels = {'Effective.Minimum.Wage': 'Effective Minimum Wage', 'Effective.Minimum.Wage.2020.Dollars': '2020 Dollars Equivalent', 'CPI.Average': 'Average Consumer Price Index'}, hover_name = 'State',\n",
    " hover_data = {'StateCode': False, 'State': False, 'Effective.Minimum.Wage.2020.Dollars': True, 'CPI.Average': True,'Effective.Minimum.Wage': True, 'Year': False},\n",
    "                    animation_frame = 'Year')\n",
    "\n",
    "fig1.update_layout(title = 'Minimum Wage by State Since 1968')\n",
    "fig1.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Average Weekly Earnings by State:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = earnState\n",
    "fig1 = px.choropleth(df, locations = df['StateCode'], locationmode = 'USA-states',\n",
    " color = 'AvgWeeklyEarnings', color_continuous_scale = \"speed\",\n",
    " scope = 'usa', labels = {'AvgWeeklyEarnings': 'Average Weekly Earnings'}, hover_name = 'StateCode',\n",
    " hover_data = {'StateCode': False},)\n",
    "\n",
    "fig1.update_layout(title = 'Average Weekly Earnings by State Since 2007')\n",
    "fig1.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Strikes by State:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = stateCounts\n",
    "fig1 = px.choropleth(df, locations = df['State'], locationmode = 'USA-states',\n",
    " color = 'Counts', color_continuous_scale = \"speed\",\n",
    " scope = 'usa', labels = {'Counts': 'Number of Strikes'}, hover_name = 'State',\n",
    " hover_data = {'State': False},)\n",
    "\n",
    "fig1.update_layout(title = 'Strikes by State Since 1988')\n",
    "fig1.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Bar chart of number of strikes by State since 1988:\n",
    "fig = px.bar(stateCounts, x='State', y='Counts',\n",
    "             hover_data=['Counts', 'State'], color='Counts',\n",
    "             labels={'Counts':'Number of Strikes'},\n",
    "             height=400, width = 1030, color_continuous_scale = 'speed',\n",
    "            title = 'Number of Strikes Since 1988 by State')\n",
    "fig.update_xaxes(\n",
    "        tickangle = 75)\n",
    "fig.update_layout(plot_bgcolor = 'white')\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Strikes by Industry:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Bar chart of number of strikes by Industry since 1988:\n",
    "fig = px.bar(industryCounts, x='Industry', y='Counts',\n",
    "             hover_data=['Counts', 'Industry'], color='Counts',\n",
    "             labels={'Counts':'Number of Strikes'},\n",
    "             height=1030, width = 1050, color_continuous_scale = 'solar_r',\n",
    "            title = 'Number of Strikes Since 1988 by Industry', orientation = 'v')\n",
    "fig.update_xaxes(\n",
    "        tickangle = 65)\n",
    "fig.update_layout(plot_bgcolor = 'black')\n",
    "fig.show()\n",
    "#Do states with lower minimum wage tend to have more strikes?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Average Weekly Earnings by Industry Since 2007:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Need to possibly add income by industry data here:\n",
    "#Bar chart of number of Average Weekly Earnings by Industry:\n",
    "fig = px.bar(earnInd, x = 'industry_name', y = 'AvgWeeklyEarnings',\n",
    "             hover_data=['AvgWeeklyEarnings', 'industry_name'], color = 'AvgWeeklyEarnings',\n",
    "             labels={'AvgWeeklyEarnings':'Average Weekly Earnings', 'industry_name': 'Industry'},\n",
    "             color_continuous_scale = 'speed', height = 750, width = 1000,\n",
    "             title = 'Average Weekly Earnings by Industry since 2007')\n",
    "fig.update_xaxes(\n",
    "        tickangle = 75)\n",
    "fig.update_layout(plot_bgcolor = 'white')\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Part 3 | Statistical Tests:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#One way ANOVA on States Minimum Wage:\n",
    "stateminwage = []\n",
    "for state in list(minwagestate.State.unique()):\n",
    "    stateminwage.append(list(minwagestate.loc[minwagestate['State'] == state,\n",
    "                                              'Effective.Minimum.Wage.2020.Dollars']))\n",
    "\n",
    "fvalue, pvalue = sts.f_oneway(*stateminwage)\n",
    "\n",
    "m_comp = pairwise_tukeyhsd(endog=minwagestate['Effective.Minimum.Wage.2020.Dollars'], groups=minwagestate['State'],\n",
    "                           alpha=0.05)\n",
    "print(m_comp.summary())\n",
    "\n",
    "#Have to look at in text file, too many groups:\n",
    "with open('mcomp_stateminwage.txt', 'w') as f:\n",
    "        print(m_comp.summary(), file=f)\n",
    "\n",
    "\n",
    "#One way ANOVA on Avg Wage States:\n",
    "avgwagestate = []\n",
    "for state in list(final_earnings.state_name.unique()):\n",
    "    avgwagestate.append(list(final_earnings.loc[final_earnings['state_name'] == state,\n",
    "                                              'AvgWeeklyEarnings']))\n",
    "\n",
    "fvalue_statewage, pvalue_statewage = sts.f_oneway(*avgwagestate)\n",
    "\n",
    "m_comp_statewage = pairwise_tukeyhsd(endog=final_earnings['AvgWeeklyEarnings'], groups=final_earnings['state_name'],\n",
    "                           alpha=0.05)\n",
    "\n",
    "#Have to look at in text file, too many groups:\n",
    "with open('mcomp_statewage.txt', 'w') as f:\n",
    "        print(m_comp_statewage.summary(), file=f)\n",
    "\n",
    "#One way ANOVA for Avg Wage by Industry:\n",
    "avgwageindustry = []\n",
    "for industry in list(final_earnings.industry_name.unique()):\n",
    "    avgwageindustry.append(list(final_earnings.loc[final_earnings['industry_name'] == industry,\n",
    "                                              'AvgWeeklyEarnings']))\n",
    "\n",
    "fvalue_industrywage, pvalue_industrywage = sts.f_oneway(*avgwageindustry)\n",
    "\n",
    "m_comp_industrywage = pairwise_tukeyhsd(endog=final_earnings['AvgWeeklyEarnings'], groups=final_earnings['industry_name'],\n",
    "                           alpha=0.05)\n",
    "\n",
    "with open('mcomp_industrywage.txt', 'w') as f:\n",
    "        print(m_comp_industrywage.summary(), file=f)\n",
    "\n",
    "#Subtract minwage from effective, if 0 = minwage state\n",
    "#Mean of 0s and 1s for a state by comparing state min wage\n",
    "#state min wage > fed min wage = 1, else 0\n",
    "#mean > .4 = 1, else 0 for minwage state\n",
    "\n",
    "\n",
    "#df: states, strikes, minwage or not\n",
    "minwagestate['MinWageStatus'] = np.where((minwagestate['Effective.Minimum.Wage']-minwagestate['Federal.Minimum.Wage']) > 0, 1, 0)\n",
    "state_status = minwagestate.groupby(['State'])['MinWageStatus'].mean().reset_index()\n",
    "state_status['MinWageStatus'] = np.round(state_status['MinWageStatus'], 2)\n",
    "\n",
    "state_status['MinWageStatus'] = np.where(state_status['MinWageStatus'] >= 0.4, 'GreaterMinWage', 'MinWage')\n",
    "\n",
    "state_status.MinWageStatus.value_counts()\n",
    "\n",
    "#Get full names for stateCounts in statestrikes:\n",
    "statestrikes = stateCounts.copy()\n",
    "statestrikes.State.replace({v:k for k,v in us_state_abbrev.items()}, inplace = True)\n",
    "\n",
    "#Merge on state_status state, WY and SD are missing, need to be dropped:\n",
    "wagestrike = pd.merge(state_status, statestrikes, on = 'State', how = 'left').dropna()\n",
    "\n",
    "#One way ANOVA for MinWage and Strikes by State:\n",
    "minornot = []\n",
    "for status in list(wagestrike.MinWageStatus.unique()):\n",
    "    minornot.append(list(wagestrike.loc[wagestrike['MinWageStatus'] == status,\n",
    "                                              'Counts']))\n",
    "\n",
    "fvalue_strikes, pvalue_strikes = sts.f_oneway(*minornot)\n",
    "\n",
    "m_comp_strikes = pairwise_tukeyhsd(endog=wagestrike['Counts'], groups=wagestrike['MinWageStatus'],\n",
    "                           alpha=0.05)\n",
    "\n",
    "print(m_comp_strikes.summary())\n",
    "#would reject because not significant\n",
    "#however, minimum wage states do have less strikes, but it's not significantly different\n",
    "#could still be due to chance"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}