In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import networkx as nx


In [2]:
data = pd.read_excel("thesis_paneldata.xlsx")
data.head()

Unnamed: 0,Country,Year,CollectiveBargain_Coverage,TradeUnions_Density,Real Average Annual Wage Growth,Annual Inflation CPI,Gini Index,Employment/population ratio Women age 15 to 64,Labour force participation rate (Women age 15 to 64),Unemployment rate (Women age 15 to 64)
0,Australia,1960,,53.799999,,3.728814,,,,
1,Australia,1961,92.0,53.200001,,2.287582,,,,
2,Australia,1962,,51.400002,,-0.319489,,,,
3,Australia,1963,,49.700001,,0.641026,,,,
4,Australia,1964,90.699997,48.200001,,2.866242,,,,


# Causal Map for the analysis

In [None]:
G = nx.DiGraph()

# Add nodes representing key variables and concepts
nodes = [
    "Collective Bargaining Coverage",
    "Trade Union Density",
    "Economic Growth",
    "Labor Market Policies",
    "Global Economic Trends",
    "Gini Index (Income Inequality)"
]

G.add_nodes_from(nodes)

# Add edges to represent hypothesized causal relationships
edges = [
    ("Collective Bargaining Coverage", "Gini Index (Income Inequality)"),
    ("Trade Union Density", "Collective Bargaining Coverage"),
    ("Economic Growth", "Gini Index (Income Inequality)"),
    ("Labor Market Policies", "Collective Bargaining Coverage"),
    ("Labor Market Policies", "Gini Index (Income Inequality)"),
    ("Global Economic Trends", "Economic Growth"),
    ("Global Economic Trends", "Labor Market Policies"),
]

G.add_edges_from(edges)

# Visualize the causal map
plt.figure(figsize=(10, 8))
pos = nx.spring_layout(G)  # positions for all nodes
nx.draw(G, pos, with_labels=True, node_size=3500, node_color="skyblue", font_size=10, font_weight="bold", arrows=True)
nx.draw_networkx_edge_labels(G, pos, font_color='red')
plt.title("Causal Map: Collective Bargaining and Income Inequality", size=15)
plt.axis("off")
plt.show()


In [None]:
# Filter out rows where Gini Index, CollectiveBargain_Coverage, or TradeUnions_Density is missing
filtered_data = data.dropna(subset=['Gini Index', 'CollectiveBargain_Coverage', 'TradeUnions_Density'])

# Select a few countries for a clearer visualization
selected_countries = ['Australia', 'Canada', 'Germany', 'Sweden']
selected_data = filtered_data[filtered_data['Country'].isin(selected_countries)]

# Plotting
fig, ax = plt.subplots(2, 1, figsize=(14, 12), sharex=True)

# Plot CollectiveBargain_Coverage vs. Gini Index
sns.lineplot(data=selected_data, x='Year', y='CollectiveBargain_Coverage', hue='Country', style='Country', markers=True, ax=ax[0], legend='full')
ax[0].set_title('Collective Bargaining Coverage Over Time')
ax[0].set_ylabel('Collective Bargain Coverage (%)')
ax[0].legend(title='Country')

# Plot Gini Index Trends
sns.lineplot(data=selected_data, x='Year', y='Gini Index', hue='Country', style='Country', markers=True, ax=ax[1], legend='full')
ax[1].set_title('Gini Index Over Time')
ax[1].set_ylabel('Gini Index')

plt.tight_layout()
plt.show()
