# AI Job Loss Prediction - Data Exploration

This notebook explores the datasets from WEF, McKinsey, BLS, and other sources to understand AI's impact on employment.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

# Add src to path
sys.path.append('../src')

from data_loader import load_all_data, load_wef_data, load_mckinsey_data
from visualizations import *

# Set visualization style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 8)

## Load All Datasets

In [None]:
# Load data
data = load_all_data()

wef = data['wef']
mckinsey = data['mckinsey']
bls = data['bls']
global_ai = data['global_ai']

print("WEF Dataset Shape:", wef.shape)
print("McKinsey Dataset Shape:", mckinsey.shape)
print("BLS Dataset Shape:", bls.shape)
print("Global AI Dataset Shape:", global_ai.shape)

## WEF Job Displacement Data Overview

In [None]:
# Display WEF data summary
display(wef.head())

print("\nWEF Data Info:")
print(wef.info())

print("\nBasic Statistics:")
display(wef.describe())

In [None]:
# Jobs displaced by year and region
yearly_displacement = wef.groupby(['year', 'region'])['jobs_displaced_millions'].sum().reset_index()
pivot_displacement = yearly_displacement.pivot(index='year', columns='region', values='jobs_displaced_millions')
display(pivot_displacement)

# Plot
pivot_displacement.plot(kind='bar', figsize=(12, 6))
plt.title('Jobs Displaced by AI - Yearly by Region')
plt.xlabel('Year')
plt.ylabel('Jobs Displaced (Millions)')
plt.legend(title='Region')
plt.show()

## McKinsey Automation Index Analysis

In [None]:
# Display McKinsey data
display(mckinsey.head())

print("\nAutomation Potential by Sector:")
sector_automation = mckinsey.groupby('sector')['automation_potential'].mean().sort_values(ascending=False)
display(sector_automation)

# Visualize
sector_automation.plot(kind='barh', figsize=(10, 8))
plt.title('Average Automation Potential by Sector')
plt.xlabel('Automation Potential (%)')
plt.show()

In [None]:
# Top 20 occupations with highest automation potential
top_automated = mckinsey.nlargest(20, 'automation_potential')[['occupation_name', 'sector', 'automation_potential', 'displacement_risk_score']]
print("Top 20 Occupations with Highest Automation Potential:")
display(top_automated)

# Visualize
plt.figure(figsize=(12, 10))
sns.barplot(data=top_automated, x='automation_potential', y='occupation_name', hue='displacement_risk_score')
plt.title('Top 20 Occupations by Automation Potential')
plt.xlabel('Automation Potential (%)')
plt.tight_layout()
plt.show()

## Global AI Adoption Trends

In [None]:
# Global AI adoption trends
country_trends = global_ai.groupby(['country', 'year'])['ai_adoption_rate'].mean().reset_index()

# Plot trends for top countries
plt.figure(figsize=(14, 8))
for country in ['United States', 'China', 'Germany', 'United Kingdom', 'Singapore']:
    country_data = country_trends[country_trends['country'] == country]
    plt.plot(country_data['year'], country_data['ai_adoption_rate'], marker='o', label=country, linewidth=2)

plt.title('AI Adoption Trends by Country')
plt.xlabel('Year')
plt.ylabel('AI Adoption Rate (%)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# Net job impact by country in 2025
impact_2025 = global_ai[global_ai['year'] == 2025][['country', 'job_displacement_risk', 'job_creation_potential', 'net_job_impact']].sort_values('net_job_impact', ascending=False)

plt.figure(figsize=(12, 8))
x = np.arange(len(impact_2025))
width = 0.35

plt.bar(x - width/2, impact_2025['job_displacement_risk'], width, label='Job Displacement Risk', color='red', alpha=0.7)
plt.bar(x + width/2, impact_2025['job_creation_potential'], width, label='Job Creation Potential', color='green', alpha=0.7)

plt.xlabel('Country')
plt.ylabel('Jobs (Index)')
plt.title('Job Displacement vs Creation Potential (2025)')
plt.xticks(x, impact_2025['country'], rotation=45, ha='right')
plt.legend()
plt.tight_layout()
plt.show()

print("\nNet Job Impact by Country (2025):")
display(impact_2025)

## BLS Employment Projections

In [None]:
# BLS employment projections
display(bls.head())

# Occupations by automation risk
risk_counts = bls['automation_risk'].value_counts()
print("\nOccupations by Automation Risk:")
print(risk_counts)

# Pie chart
plt.figure(figsize=(10, 8))
colors = {'Low': 'green', 'Medium': 'orange', 'High': 'red'}
risk_counts.plot(kind='pie', autopct='%1.1f%%', colors=[colors.get(x, 'gray') for x in risk_counts.index])
plt.title('Distribution of Occupations by Automation Risk')
plt.ylabel('')
plt.show()

In [None]:
# Top 10 growing vs declining occupations
print("Top 10 Fastest Growing Occupations:")
display(bls.nlargest(10, 'percent_change')[['occupation_title', 'percent_change', 'automation_risk']])

print("\nTop 10 Fastest Declining Occupations:")
display(bls.nsmallest(10, 'percent_change')[['occupation_title', 'percent_change', 'automation_risk']])

## Key Insights

### Summary of Findings:

1. **Job Displacement**: By 2025, an estimated 85+ million jobs globally could be displaced by AI
2. **High-Risk Sectors**: Manufacturing, administrative, and customer service show highest automation potential
3. **Protected Occupations**: Healthcare, education, and creative roles show lower automation risk
4. **Net Impact**: Despite displacement, AI is expected to create 97+ million new roles
5. **Regional Differences**: North America and Asia Pacific show highest AI adoption rates