In [10]:
import pandas as pd

# Read Statistics Canada formatted data
strikeCountsDf = pd.read_csv('data/strikes/strike-count.csv')
strikeAverageDurationsDf = pd.read_csv('data/strikes/strike-average-duration.csv')
strikeDaysNotWorkedDf = pd.read_csv('data/strikes/strike-days-not-worked.csv')

# Read FRED Data
unemploymentDf = pd.read_csv('data/unemployment.csv')
realGdpDf = pd.read_csv('data/real-gdp.csv')
inflationDf = pd.read_csv('data/inflation.csv')
populationDf = pd.read_csv('data/population.csv')

# Read Labour Share Data
# Household Compensation is Statistics Canada data, but it has been modified in Excel to have years as the dates
householdCompensationDf = pd.read_csv('data/labour-share/household-compensation.csv')

# Nominal GDP is Fred data
nominalGdpDf = pd.read_csv('data/labour-share/nominal-gdp.csv')

In [11]:
# Cleaning data
# Convert all FRED dates to the just a year
def convertToYears(date):
    return int(date[:4])

fredDfs = [unemploymentDf, realGdpDf, inflationDf, populationDf, nominalGdpDf]

for df in fredDfs:
    df['DATE'] = df['DATE'].apply(convertToYears)

# Rename datasets and drop unnecessary rows
strikeCountsDf = strikeCountsDf.rename(columns={'VALUE': 'strikeCounts'})
strikeAverageDurationsDf = strikeAverageDurationsDf.rename(columns={'VALUE': 'strikeAverageDurations'})
strikeDaysNotWorkedDf = strikeDaysNotWorkedDf.rename(columns={'VALUE': 'daysNotWorkedDueToStrikes'})

householdCompensationDf = householdCompensationDf.rename(columns={'VALUE': 'householdCompensation'})
nominalGdpDf = nominalGdpDf.rename(columns={'NGDPXDCCAA': 'nominalGdp'})

realGdpDf = realGdpDf.rename(columns={'NGDPRSAXDCCAQ': 'realGdp'})
unemploymentDf = unemploymentDf.rename(columns={'LRUNTTTTCAA156S': 'unemploymentRate'})
inflationDf = inflationDf.rename(columns={'FPCPITOTLZGCAN': 'inflationRate'})
populationDf = populationDf.rename(columns={'POPTOTCAA647NWDB': 'population'})


In [12]:
# Merge into one dataframe
allDfs = [strikeCountsDf, strikeAverageDurationsDf, strikeDaysNotWorkedDf, unemploymentDf, realGdpDf, inflationDf, populationDf, householdCompensationDf, nominalGdpDf]

fullDf = strikeCountsDf

for df in allDfs:
    fullDf = pd.merge(fullDf, df, on='DATE')
    
print(fullDf)

    DATE  strikeCounts_x  strikeCounts_y  strikeAverageDurations  \
0   1961             287             287                    17.8   
1   1962             311             311                    18.3   
2   1963             332             332                    19.7   
3   1964             343             343                    21.9   
4   1965             502             502                    20.0   
..   ...             ...             ...                     ...   
58  2019             128             128                    33.2   
59  2020              66              66                    44.5   
60  2021             186             186                    23.4   
61  2022             176             176                    57.6   
62  2023             778             778                    24.5   

    daysNotWorkedDueToStrikes  unemploymentRate   NGDPRXDCCAA  inflationRate  \
0                     1335080          7.183333  3.766743e+05       1.018767   
1                     1