In [None]:
import pandas as pd

# 1. Load with 'utf-8-sig' to remove the ï»¿ character
df = pd.read_csv('indicator-1-unemployment-v3.csv', encoding='utf-8-sig')

# 2. Fix headers (remove newlines and select only business columns)
df.columns = [col.replace('\n', ' ').strip() for col in df.columns]
core_cols = ['Quarter', 'National Unemployment Rate SA (%)', 'Dublin Unemployment Rate SA (%)', 'Dublin Employed SA (000)']
df_clean = df[core_cols].dropna(subset=['Quarter'])

# 3. Data Sanitization: Convert '8.1%' strings to 8.1 floats
for col in df_clean.columns[1:]:
    df_clean[col] = df_clean[col].astype(str).str.replace('%', '').str.replace(',', '').astype(float)

# 4. Save to Clean CSV
df_clean.to_csv('dublin_unemployment_CLEAN.csv', index=False)

In [3]:
import pandas as pd
import matplotlib.pyplot as plt

# 1. Load the data
df = pd.read_csv('dublin_unemployment_CLEAN.csv')

# Helper to convert 'Q1 98' to a datetime for proper sorting
def quarter_to_date(q):
    q_part, year_part = q.split(' ')
    year = int(year_part)
    year = year + 2000 if year < 50 else year + 1900
    month = {'Q1': 1, 'Q2': 4, 'Q3': 7, 'Q4': 10}[q_part]
    return pd.to_datetime(f"{year}-{month}-01")

df['Date'] = df['Quarter'].apply(quarter_to_date)
df = df.sort_values('Date')

# --- IMAGE 1: unemployment_comparison.png ---
plt.figure(figsize=(10, 5))
plt.plot(df['Date'], df['National Unemployment Rate SA (%)'], label='National', color='#3498db')
plt.plot(df['Date'], df['Dublin Unemployment Rate SA (%)'], label='Dublin', color='#2ecc71', linewidth=2)
plt.title('National vs Dublin Unemployment Rates')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig('unemployment_comparison.png')
plt.close()

# --- IMAGE 2: moving_average_trend.png ---
df['MA'] = df['Dublin Unemployment Rate SA (%)'].rolling(window=4).mean()
plt.figure(figsize=(10, 5))
plt.plot(df['Date'], df['Dublin Unemployment Rate SA (%)'], alpha=0.3, color='gray', label='Actual')
plt.plot(df['Date'], df['MA'], color='#e67e22', linewidth=2, label='1-Year Moving Avg')
plt.title('Dublin Unemployment Trend Line')
plt.legend()
plt.savefig('moving_average_trend.png')
plt.close()

# --- IMAGE 3: job_growth_yoy.png ---
df['YoY_Change'] = df['Dublin Employed SA (000)'].diff(4)
plt.figure(figsize=(10, 5))
plt.bar(df['Date'], df['YoY_Change'], width=50, color='#9b59b6', alpha=0.7)
plt.axhline(0, color='black', linewidth=0.8)
plt.title('Dublin Year-over-Year Job Growth (Thousands)')
plt.savefig('job_growth_yoy.png')
plt.close()

# --- IMAGE 4: performance_gap.png ---
df['Gap'] = df['Dublin Unemployment Rate SA (%)'] - df['National Unemployment Rate SA (%)']
plt.figure(figsize=(10, 5))
plt.fill_between(df['Date'], df['Gap'], 0, where=(df['Gap'] < 0), color='green', alpha=0.4, label='Dublin Better')
plt.fill_between(df['Date'], df['Gap'], 0, where=(df['Gap'] >= 0), color='red', alpha=0.4, label='Dublin Worse')
plt.axhline(0, color='black')
plt.title('Economic Performance Gap (Dublin vs National)')
plt.legend()
plt.savefig('performance_gap.png')
plt.close()

print("All 4 images have been saved to your folder!")

All 4 images have been saved to your folder!
