In [221]:
import pandas as pd

# Read data
strikeCountsDf = pd.read_csv('data/strike-count.csv')
cpiDf = pd.read_csv('data/cpi.csv')

unemploymentDf = pd.read_csv('data/unemployment.csv')
labourShareDf = pd.read_csv('data/labour-share-of-income.csv')
realGdpDf = pd.read_csv('data/real-gdp.csv')

In [222]:
# Cleaning data
# Convert all FRED dates to the just a year
def convertToYears(date):
    return int(date[:4])

fredDfs = [unemploymentDf, labourShareDf, realGdpDf]

for df in fredDfs:
    df['DATE'] = df['DATE'].apply(convertToYears)

cpiDf = cpiDf[['REF_DATE', 'VALUE']]
strikeCountsDf = strikeCountsDf[['REF_DATE', 'VALUE']]

# Rename datasets and drop unnecessary rows
cpiDf = cpiDf.rename(columns={'REF_DATE': 'DATE', 'VALUE': 'annualCpi'})
strikeCountsDf = strikeCountsDf.rename(columns={'REF_DATE': 'DATE', 'VALUE': 'strikeCounts'})
labourShareDf = labourShareDf.rename(columns={'LABSHPCAA156NRUG': 'labourShare'})
realGdpDf = realGdpDf.rename(columns={'NGDPRSAXDCCAQ': 'realGdp'})
unemploymentDf = unemploymentDf.rename(columns={'LRUNTTTTCAA156S': 'unemploymentRate'})

In [223]:
# Since the latest date we have available in the datasets in 1961, we're going to drop all rows before then for each dataset
dfs = [strikeCountsDf, cpiDf, unemploymentDf, labourShareDf, realGdpDf]
newDfs = []

for df in dfs:
    isRowDroppable = df['DATE'] < 1961
    newDf = df[~isRowDroppable]
    newDfs.append(newDf)
    
# Combining all the values into a single dataframe
df = pd.DataFrame(data={'date': strikeCountsDf['DATE'], 'strikeCounts': strikeCountsDf['strikeCounts'],
                        'annualCpi': cpiDf['annualCpi']})

print(df)

       date  strikeCounts  annualCpi
0    1946.0         227.0       15.7
1    1947.0         234.0       15.9
2    1948.0         154.0       16.1
3    1949.0         135.0       16.4
4    1950.0         161.0       16.8
..      ...           ...        ...
978     NaN           NaN      149.2
979     NaN           NaN      151.3
980     NaN           NaN      154.8
981     NaN           NaN      162.6
982     NaN           NaN      170.1

[983 rows x 3 columns]
