In [1]:
import os, re, struct
import pandas as pd
import numpy as np
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
from collections import Counter

In [2]:
import pymongo
from pymongo import MongoClient
client = MongoClient()
client.CPS.list_collection_names()
CPS = client.CPS

## 01. Retrieve data from mongodb

In [3]:
CPS.list_collection_names()

['PEEDUCA', 'PREMPNOT', 'PESEX', 'PTDTRACE']

In [71]:
PEEDUCA = CPS.PEEDUCA.find_one()
PREMPNOT = CPS.PREMPNOT.find_one()
PESEX = CPS.PESEX.find_one()
PTDTRACE = CPS.PTDTRACE.find_one()
PWSSWGT = CPS.PWSSWGT.find_one()
PWCMPWGT = CPS.PWCMPWGT.find_one()

In [90]:
months = ['jan', 'feb', 'mar', 'apr']
df = pd.DataFrame(columns = ['month', 'weight_cf', 'weight', 'emp', 'sex', 'race', 'educa'])
for mon in months:
    temp_df = pd.DataFrame({
        'month': mon,
        'weight_cf': PWCMPWGT[mon],
        'weight': PWSSWGT[mon],
        'emp': PREMPNOT[mon],
        'sex': PESEX[mon],
        'race': PTDTRACE[mon],
        'educa': PEEDUCA[mon]
    })
    df = df.append(temp_df)

## 02. Calculate non-seasonally adjusted unemployment rate

In [109]:
df["weight"] = pd.to_numeric(df["weight"])
df["weight_cf"] = pd.to_numeric(df["weight_cf"])
df = df.loc[df['weight_cf']>0]

Here is how the weights should work: for example, if a person has a weight of 1,500 then the household is theoretically representing 1,500 other persons. 

In practice, the weights are scaled up by 10000.

I scale it down and convert units to thousands.

In [111]:
df.head(15)

Unnamed: 0,month,weight_cf,weight,emp,sex,race,educa
0,jan,17347552,17713809,4,1,2,38
1,jan,16756084,16864805,4,2,2,40
2,jan,21463402,20481802,4,1,1,40
3,jan,30966041,30137016,1,1,1,39
4,jan,17590812,17309391,1,2,1,43
5,jan,22252360,21931976,1,1,2,39
6,jan,20126642,19718618,1,2,2,39
8,jan,22147484,22450914,4,2,1,39
9,jan,28851877,33004731,4,2,2,39
10,jan,17379723,17492490,4,2,2,39


http://www.dlt.ri.gov/lmi/laus/us/usunadj.htm

These are the non-seasonally adjusted unemployment rates

In [112]:
pd.options.display.float_format = '{:.5f}'.format

In [106]:
emp = df.loc[df['emp']==1]
unemp = df.loc[df['emp']==2]

In [114]:
tot_emp = emp['weight_cf'].groupby(emp["month"]).sum()
tot_unemp = unemp['weight_cf'].groupby(unemp["month"]).sum()
tot_unemp * 100 / (tot_emp + tot_unemp)

month
apr   14.44144
feb    3.78583
jan    3.97775
mar    4.53429
Name: weight_cf, dtype: float64

In [116]:
print(tot_emp/10000000) # this matches exactly the data from DLT

month
apr   133325.80812
feb   158017.40388
jan   156993.73231
mar   155167.19227
Name: weight_cf, dtype: float64
