# 06 - Time Series Analysis: AI vs General IT Job Market Trends

## Objective
Analyze temporal trends in job postings to understand the evolution of AI/ML roles compared to General IT roles over time.

## Key Questions
- How have AI/ML job postings changed over time?
- How do AI/ML trends compare to General IT trends?
- When did trends diverge (inflection points)?
- Are the observed trends statistically significant?
- What is the growth trajectory of each category?

## Metrics to Calculate
- Total postings per month
- AI role postings per month
- General IT role postings per month
- Ratio of AI:General IT roles
- Month-over-month growth rates
- Statistical significance of trends

## 1. Environment Setup


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from scipy import stats
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (14, 8)
plt.rcParams['font.size'] = 10
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['axes.labelsize'] = 12

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)
pd.set_option('display.float_format', '{:.2f}'.format)

print(" Libraries imported successfully")
print(f"   Pandas version: {pd.__version__}")
print(f"   NumPy version: {np.__version__}")


## 2. Load Categorized Data


In [None]:
print(" Loading categorized dataset...\n")

df = pd.read_csv('data/processed/categorized_jobs.csv')

print("="*70)
print(" DATASET LOADED")
print("="*70)

print(f"\n Dataset Overview:")
print(f"   Total records: {len(df):,}")
print(f"   Columns: {len(df.columns)}")

print(f"\n Category Distribution:")
print(df['role_category'].value_counts())

print(f"\n Date column check:")
print(f"   posting_date dtype: {df['posting_date'].dtype}")


## 3. Data Preparation & Validation


In [None]:
print(" Preparing data for time series analysis...\n")
print("="*70)

df['posting_date'] = pd.to_datetime(df['posting_date'])
df['year_month'] = df['posting_date'].dt.to_period('M')

print("\n1️⃣  Filtering Data:")
total_before = len(df)
df_tech = df[df['role_category'].isin(['AI/ML', 'General IT', 'Hybrid'])].copy()
print(f"   Total records before filtering: {total_before:,}")
print(f"   Tech roles (AI/ML + General IT + Hybrid): {len(df_tech):,}")
print(f"   Non-Tech roles excluded: {total_before - len(df_tech):,}")

print("\n2️⃣  Date Range Validation:")
min_date = df_tech['posting_date'].min()
max_date = df_tech['posting_date'].max()
date_range = (max_date - min_date).days

print(f"   Earliest posting: {min_date.strftime('%Y-%m-%d')}")
print(f"   Latest posting: {max_date.strftime('%Y-%m-%d')}")
print(f"   Date range: {date_range} days ({date_range/30:.1f} months)")
print(f"   Unique months: {df_tech['year_month'].nunique()}")

print("\n3️⃣  Data Completeness:")
monthly_counts = df_tech.groupby('year_month').size()
print(f"   Average postings per month: {monthly_counts.mean():.0f}")
print(f"   Median postings per month: {monthly_counts.median():.0f}")
print(f"   Min postings in a month: {monthly_counts.min()}")
print(f"   Max postings in a month: {monthly_counts.max()}")

all_months = pd.period_range(start=df_tech['year_month'].min(), 
                              end=df_tech['year_month'].max(), 
                              freq='M')
missing_months = set(all_months) - set(df_tech['year_month'].unique())

if missing_months:
    print(f"\n     Missing months detected: {len(missing_months)}")
    for month in sorted(missing_months):
        print(f"      • {month}")
else:
    print(f"    No missing months - continuous data series")

print("\n" + "="*70)
print("✅ Data preparation complete")


## 4. Monthly Aggregation by Category


In [None]:
print(" Aggregating data by month and category...\n")
print("="*70)

monthly_by_category = df_tech.groupby(['year_month', 'role_category']).size().unstack(fill_value=0)

for category in ['AI/ML', 'General IT', 'Hybrid']:
    if category not in monthly_by_category.columns:
        monthly_by_category[category] = 0

monthly_by_category['Total'] = monthly_by_category.sum(axis=1)

monthly_by_category['AI_ML_pct'] = (monthly_by_category['AI/ML'] / monthly_by_category['Total'] * 100).round(1)
monthly_by_category['General_IT_pct'] = (monthly_by_category['General IT'] / monthly_by_category['Total'] * 100).round(1)
monthly_by_category['Hybrid_pct'] = (monthly_by_category['Hybrid'] / monthly_by_category['Total'] * 100).round(1)

monthly_by_category['AI_IT_ratio'] = (monthly_by_category['AI/ML'] / monthly_by_category['General IT'].replace(0, np.nan)).round(3)

print("\n Monthly Job Postings by Category:")
print(monthly_by_category)

print(f"\n\n Summary Statistics:")
print(monthly_by_category[['AI/ML', 'General IT', 'Hybrid', 'Total']].describe().round(1))


## 5. Growth Rate Analysis (Month-over-Month)


In [None]:
print(" Calculating month-over-month growth rates...\n")
print("="*70)

monthly_by_category['AI_ML_MoM_%'] = monthly_by_category['AI/ML'].pct_change() * 100
monthly_by_category['General_IT_MoM_%'] = monthly_by_category['General IT'].pct_change() * 100
monthly_by_category['Hybrid_MoM_%'] = monthly_by_category['Hybrid'].pct_change() * 100
monthly_by_category['Total_MoM_%'] = monthly_by_category['Total'].pct_change() * 100

baseline_month = monthly_by_category.index[0]
baseline_ai = monthly_by_category.loc[baseline_month, 'AI/ML']
baseline_it = monthly_by_category.loc[baseline_month, 'General IT']

if baseline_ai > 0:
    monthly_by_category['AI_ML_cumulative_%'] = ((monthly_by_category['AI/ML'] / baseline_ai) - 1) * 100
if baseline_it > 0:
    monthly_by_category['General_IT_cumulative_%'] = ((monthly_by_category['General IT'] / baseline_it) - 1) * 100

print("\n Month-over-Month Growth Rates:")
growth_cols = ['AI/ML', 'AI_ML_MoM_%', 'General IT', 'General_IT_MoM_%', 'AI_IT_ratio']
print(monthly_by_category[growth_cols])

print(f"\n\n Average Monthly Growth Rates:")
print(f"   AI/ML average MoM: {monthly_by_category['AI_ML_MoM_%'].mean():.1f}%")
print(f"   General IT average MoM: {monthly_by_category['General_IT_MoM_%'].mean():.1f}%")
print(f"   Total average MoM: {monthly_by_category['Total_MoM_%'].mean():.1f}%")

print(f"\n Total Growth (First to Last Month):")
if len(monthly_by_category) > 1:
    first_month = monthly_by_category.index[0]
    last_month = monthly_by_category.index[-1]
    
    ai_first = monthly_by_category.loc[first_month, 'AI/ML']
    ai_last = monthly_by_category.loc[last_month, 'AI/ML']
    it_first = monthly_by_category.loc[first_month, 'General IT']
    it_last = monthly_by_category.loc[last_month, 'General IT']
    
    if ai_first > 0:
        ai_total_growth = ((ai_last / ai_first) - 1) * 100
        print(f"   AI/ML: {ai_first} → {ai_last} ({ai_total_growth:+.1f}%)")
    
    if it_first > 0:
        it_total_growth = ((it_last / it_first) - 1) * 100
        print(f"   General IT: {it_first} → {it_last} ({it_total_growth:+.1f}%)")


## 6. Visualization: Monthly Trends


In [None]:
print(" Creating monthly trend visualizations...\n")

monthly_plot = monthly_by_category.copy()
monthly_plot.index = monthly_plot.index.to_timestamp()

fig, ax = plt.subplots(figsize=(14, 7))

ax.plot(monthly_plot.index, monthly_plot['AI/ML'], 
        marker='o', linewidth=2.5, markersize=8, label='AI/ML', color='#2E86AB')
ax.plot(monthly_plot.index, monthly_plot['General IT'], 
        marker='s', linewidth=2.5, markersize=8, label='General IT', color='#A23B72')
ax.plot(monthly_plot.index, monthly_plot['Hybrid'], 
        marker='^', linewidth=2, markersize=7, label='Hybrid', color='#F18F01', alpha=0.7)

ax.set_xlabel('Month', fontsize=12, fontweight='bold')
ax.set_ylabel('Number of Job Postings', fontsize=12, fontweight='bold')
ax.set_title('Job Market Trends: AI/ML vs General IT Roles Over Time', 
             fontsize=14, fontweight='bold', pad=20)
ax.legend(fontsize=11, loc='best')
ax.grid(True, alpha=0.3)

ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45, ha='right')

plt.tight_layout()
plt.savefig('data/processed/time_series_trends.png', dpi=300, bbox_inches='tight')
plt.show()

print("Chart saved: data/processed/time_series_trends.png")


## 7. Visualization: Stacked Area Chart


In [None]:
fig, ax = plt.subplots(figsize=(14, 7))

ax.fill_between(monthly_plot.index, 0, monthly_plot['AI/ML'], 
                label='AI/ML', alpha=0.7, color='#2E86AB')
ax.fill_between(monthly_plot.index, monthly_plot['AI/ML'], 
                monthly_plot['AI/ML'] + monthly_plot['General IT'],
                label='General IT', alpha=0.7, color='#A23B72')
ax.fill_between(monthly_plot.index, 
                monthly_plot['AI/ML'] + monthly_plot['General IT'],
                monthly_plot['Total'],
                label='Hybrid', alpha=0.7, color='#F18F01')

ax.set_xlabel('Month', fontsize=12, fontweight='bold')
ax.set_ylabel('Number of Job Postings', fontsize=12, fontweight='bold')
ax.set_title('Job Market Composition: Stacked Distribution Over Time', 
             fontsize=14, fontweight='bold', pad=20)
ax.legend(fontsize=11, loc='upper left')
ax.grid(True, alpha=0.3)

ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45, ha='right')

plt.tight_layout()
plt.savefig('data/processed/stacked_area_chart.png', dpi=300, bbox_inches='tight')
plt.show()

print(" Chart saved: data/processed/stacked_area_chart.png")


## 8. Visualization: AI:IT Ratio Analysis


In [None]:
fig, ax1 = plt.subplots(figsize=(14, 7))

ax1.plot(monthly_plot.index, monthly_plot['AI/ML'], 
         marker='o', linewidth=2.5, markersize=8, label='AI/ML Jobs', color='#2E86AB')
ax1.plot(monthly_plot.index, monthly_plot['General IT'], 
         marker='s', linewidth=2.5, markersize=8, label='General IT Jobs', color='#A23B72')
ax1.set_xlabel('Month', fontsize=12, fontweight='bold')
ax1.set_ylabel('Number of Job Postings', fontsize=12, fontweight='bold', color='black')
ax1.tick_params(axis='y', labelcolor='black')

ax2 = ax1.twinx()
ax2.plot(monthly_plot.index, monthly_plot['AI_IT_ratio'], 
         marker='D', linewidth=2, markersize=7, label='AI:IT Ratio', 
         color='#F18F01', linestyle='--', alpha=0.8)
ax2.set_ylabel('AI/ML : General IT Ratio', fontsize=12, fontweight='bold', color='#F18F01')
ax2.tick_params(axis='y', labelcolor='#F18F01')

ax1.set_title('Job Postings and AI:IT Ratio Over Time', 
              fontsize=14, fontweight='bold', pad=20)
ax1.legend(loc='upper left', fontsize=10)
ax2.legend(loc='upper right', fontsize=10)
ax1.grid(True, alpha=0.3)

ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45, ha='right')

plt.tight_layout()
plt.savefig('data/processed/ratio_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print(" Chart saved: data/processed/ratio_analysis.png")


## 9. Visualization: Month-over-Month Growth Rates


In [None]:
fig, ax = plt.subplots(figsize=(14, 7))

growth_plot = monthly_plot[['AI_ML_MoM_%', 'General_IT_MoM_%']].replace([np.inf, -np.inf], np.nan).dropna()

x_pos = np.arange(len(growth_plot))
width = 0.35

bars1 = ax.bar(x_pos - width/2, growth_plot['AI_ML_MoM_%'], width, 
               label='AI/ML MoM%', color='#2E86AB', alpha=0.8)
bars2 = ax.bar(x_pos + width/2, growth_plot['General_IT_MoM_%'], width,
               label='General IT MoM%', color='#A23B72', alpha=0.8)

ax.axhline(y=0, color='black', linestyle='-', linewidth=0.8)
ax.set_xlabel('Month', fontsize=12, fontweight='bold')
ax.set_ylabel('Month-over-Month Growth Rate (%)', fontsize=12, fontweight='bold')
ax.set_title('Month-over-Month Growth Rates: AI/ML vs General IT', 
             fontsize=14, fontweight='bold', pad=20)
ax.set_xticks(x_pos)
ax.set_xticklabels([d.strftime('%Y-%m') for d in growth_plot.index], rotation=45, ha='right')
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('data/processed/mom_growth_rates.png', dpi=300, bbox_inches='tight')
plt.show()

print(" Chart saved: data/processed/mom_growth_rates.png")


## 10. Inflection Point Detection


In [None]:
print(" Detecting trend inflection points...\n")
print("="*70)

window_3 = 3
monthly_by_category['AI_ML_rolling_3'] = monthly_by_category['AI/ML'].rolling(window=window_3, center=True).mean()
monthly_by_category['IT_rolling_3'] = monthly_by_category['General IT'].rolling(window=window_3, center=True).mean()

print(f"\n1️  Rolling Averages ({window_3}-month):")
print(monthly_by_category[['AI/ML', 'AI_ML_rolling_3', 'General IT', 'IT_rolling_3']])

print(f"\n\n2️  Linear Trend Analysis:")

monthly_by_category['month_numeric'] = range(len(monthly_by_category))

ai_slope, ai_intercept, ai_r, ai_p, ai_stderr = stats.linregress(
    monthly_by_category['month_numeric'], 
    monthly_by_category['AI/ML']
)

it_slope, it_intercept, it_r, it_p, it_stderr = stats.linregress(
    monthly_by_category['month_numeric'],
    monthly_by_category['General IT']
)

print(f"\n   AI/ML Trend:")
print(f"      Slope: {ai_slope:.2f} jobs/month")
print(f"      R²: {ai_r**2:.4f}")
print(f"      P-value: {ai_p:.4f}")
print(f"      Trend: {'Statistically significant' if ai_p < 0.05 else 'Not significant'} (α=0.05)")

print(f"\n   General IT Trend:")
print(f"      Slope: {it_slope:.2f} jobs/month")
print(f"      R²: {it_r**2:.4f}")
print(f"      P-value: {it_p:.4f}")
print(f"      Trend: {'Statistically significant' if it_p < 0.05 else 'Not significant'} (α=0.05)")

print(f"\n\n3️  Growth Rate Comparison:")

if ai_slope > it_slope:
    slope_diff = ai_slope - it_slope
    print(f"    AI/ML is growing FASTER than General IT")
    print(f"      Difference: {slope_diff:.2f} jobs/month")
    print(f"      AI/ML monthly growth: {ai_slope:.2f} jobs/month")
    print(f"      General IT monthly growth: {it_slope:.2f} jobs/month")
else:
    slope_diff = it_slope - ai_slope
    print(f"   General IT is growing faster than AI/ML")
    print(f"      Difference: {slope_diff:.2f} jobs/month")

print(f"\n\n4️  Trend Divergence Analysis:")

ratio_change = monthly_by_category['AI_IT_ratio'].diff()
significant_ratio_changes = ratio_change[abs(ratio_change) > ratio_change.std()]

if len(significant_ratio_changes) > 0:
    print(f"\n   Months with significant ratio changes:")
    for month, change in significant_ratio_changes.items():
        direction = "↑" if change > 0 else "↓"
        print(f"      {month}: {direction} {abs(change):.3f} change in AI:IT ratio")
else:
    print(f"   No significant divergence points detected")

print("\n" + "="*70)


## 11. Visualization: Trend Lines with Analysis


In [None]:
fig, ax = plt.subplots(figsize=(14, 7))

monthly_plot = monthly_by_category.copy()
monthly_plot.index = monthly_plot.index.to_timestamp()

ax.scatter(monthly_plot.index, monthly_plot['AI/ML'], 
           s=100, alpha=0.6, color='#2E86AB', label='AI/ML (actual)', zorder=5)
ax.scatter(monthly_plot.index, monthly_plot['General IT'], 
           s=100, alpha=0.6, color='#A23B72', label='General IT (actual)', zorder=5)

ai_trend = ai_intercept + ai_slope * monthly_by_category['month_numeric']
it_trend = it_intercept + it_slope * monthly_by_category['month_numeric']

ax.plot(monthly_plot.index, ai_trend, 
        linewidth=3, color='#2E86AB', linestyle='--', 
        label=f'AI/ML trend (slope={ai_slope:.2f})', alpha=0.8)
ax.plot(monthly_plot.index, it_trend, 
        linewidth=3, color='#A23B72', linestyle='--', 
        label=f'General IT trend (slope={it_slope:.2f})', alpha=0.8)

ax.set_xlabel('Month', fontsize=12, fontweight='bold')
ax.set_ylabel('Number of Job Postings', fontsize=12, fontweight='bold')
ax.set_title('Trend Analysis: Actual Data vs Linear Trends', 
             fontsize=14, fontweight='bold', pad=20)
ax.legend(fontsize=10, loc='best')
ax.grid(True, alpha=0.3)

ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45, ha='right')

plt.tight_layout()
plt.savefig('data/processed/trend_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print(" Chart saved: data/processed/trend_analysis.png")


## 12. Statistical Significance Testing


In [None]:
print(" Performing statistical significance tests...\n")
print("="*70)

print("\n1️  Mann-Kendall Trend Test:")
print("   (Tests for monotonic increasing or decreasing trend)\n")

def mann_kendall_test(data):
    n = len(data)
    s = 0
    
    for i in range(n-1):
        for j in range(i+1, n):
            s += np.sign(data[j] - data[i])
    
    var_s = n * (n - 1) * (2 * n + 5) / 18
    
    if s > 0:
        z = (s - 1) / np.sqrt(var_s)
    elif s < 0:
        z = (s + 1) / np.sqrt(var_s)
    else:
        z = 0
    
    p_value = 2 * (1 - stats.norm.cdf(abs(z)))
    tau = s / (0.5 * n * (n - 1))
    
    return tau, p_value, z

ai_tau, ai_p_mk, ai_z = mann_kendall_test(monthly_by_category['AI/ML'].values)
print(f"   AI/ML Roles:")
print(f"      Kendall's Tau: {ai_tau:.4f}")
print(f"      Z-statistic: {ai_z:.4f}")
print(f"      P-value: {ai_p_mk:.4f}")
if ai_p_mk < 0.05:
    trend_direction = "increasing" if ai_tau > 0 else "decreasing"
    print(f"       Significant {trend_direction} trend detected (α=0.05)")
else:
    print(f"       No significant trend detected (α=0.05)")

it_tau, it_p_mk, it_z = mann_kendall_test(monthly_by_category['General IT'].values)
print(f"\n   General IT Roles:")
print(f"      Kendall's Tau: {it_tau:.4f}")
print(f"      Z-statistic: {it_z:.4f}")
print(f"      P-value: {it_p_mk:.4f}")
if it_p_mk < 0.05:
    trend_direction = "increasing" if it_tau > 0 else "decreasing"
    print(f"       Significant {trend_direction} trend detected (α=0.05)")
else:
    print(f"      ❌ No significant trend detected (α=0.05)")

print(f"\n\n2️  Pearson Correlation with Time:")

ai_corr, ai_corr_p = stats.pearsonr(monthly_by_category['month_numeric'], 
                                     monthly_by_category['AI/ML'])
it_corr, it_corr_p = stats.pearsonr(monthly_by_category['month_numeric'], 
                                     monthly_by_category['General IT'])

print(f"\n   AI/ML Roles:")
print(f"      Correlation: {ai_corr:.4f}")
print(f"      P-value: {ai_corr_p:.4f}")
print(f"      R²: {ai_corr**2:.4f}")
print(f"      Interpretation: {'Strong' if abs(ai_corr) > 0.7 else 'Moderate' if abs(ai_corr) > 0.4 else 'Weak'} correlation with time")

print(f"\n   General IT Roles:")
print(f"      Correlation: {it_corr:.4f}")
print(f"      P-value: {it_corr_p:.4f}")
print(f"      R²: {it_corr**2:.4f}")
print(f"      Interpretation: {'Strong' if abs(it_corr) > 0.7 else 'Moderate' if abs(it_corr) > 0.4 else 'Weak'} correlation with time")

print(f"\n\n3️  T-Test: First Half vs Second Half:")

midpoint = len(monthly_by_category) // 2
ai_first_half = monthly_by_category['AI/ML'].iloc[:midpoint]
ai_second_half = monthly_by_category['AI/ML'].iloc[midpoint:]
it_first_half = monthly_by_category['General IT'].iloc[:midpoint]
it_second_half = monthly_by_category['General IT'].iloc[midpoint:]

ai_t_stat, ai_t_p = stats.ttest_ind(ai_second_half, ai_first_half)
print(f"\n   AI/ML Roles:")
print(f"      First half mean: {ai_first_half.mean():.1f}")
print(f"      Second half mean: {ai_second_half.mean():.1f}")
print(f"      Difference: {ai_second_half.mean() - ai_first_half.mean():.1f}")
print(f"      T-statistic: {ai_t_stat:.4f}")
print(f"      P-value: {ai_t_p:.4f}")
if ai_t_p < 0.05:
    print(f"      Significant difference between periods (α=0.05)")
else:
    print(f"       No significant difference between periods (α=0.05)")

it_t_stat, it_t_p = stats.ttest_ind(it_second_half, it_first_half)
print(f"\n   General IT Roles:")
print(f"      First half mean: {it_first_half.mean():.1f}")
print(f"      Second half mean: {it_second_half.mean():.1f}")
print(f"      Difference: {it_second_half.mean() - it_first_half.mean():.1f}")
print(f"      T-statistic: {it_t_stat:.4f}")
print(f"      P-value: {it_t_p:.4f}")
if it_t_p < 0.05:
    print(f"       Significant difference between periods (α=0.05)")
else:
    print(f"      ❌ No significant difference between periods (α=0.05)")

print("\n" + "="*70)


In [None]:
print("\n" + "="*70)
print(" TIME SERIES ANALYSIS - SUMMARY REPORT")
print("="*70)

print(f"\n📊 DATASET OVERVIEW:")
print(f"   Analysis period: {monthly_by_category.index[0]} to {monthly_by_category.index[-1]}")
print(f"   Total months analyzed: {len(monthly_by_category)}")
print(f"   Total tech job postings: {df_tech.shape[0]:,}")

print(f"\n\n📈 OVERALL TRENDS:")
print(f"\n   AI/ML Roles:")
print(f"      Total postings: {monthly_by_category['AI/ML'].sum():,}")
print(f"      Average per month: {monthly_by_category['AI/ML'].mean():.1f}")
print(f"      Growth trend: {ai_slope:.2f} jobs/month")
print(f"      Linear trend R²: {ai_r**2:.4f}")
print(f"      Trend significance: p={ai_p:.4f} {'✅ Significant' if ai_p < 0.05 else '❌ Not significant'}")

print(f"\n   General IT Roles:")
print(f"      Total postings: {monthly_by_category['General IT'].sum():,}")
print(f"      Average per month: {monthly_by_category['General IT'].mean():.1f}")
print(f"      Growth trend: {it_slope:.2f} jobs/month")
print(f"      Linear trend R²: {it_r**2:.4f}")
print(f"      Trend significance: p={it_p:.4f} {'✅ Significant' if it_p < 0.05 else '❌ Not significant'}")

print(f"\n   Hybrid Roles:")
print(f"      Total postings: {monthly_by_category['Hybrid'].sum():,}")
print(f"      Average per month: {monthly_by_category['Hybrid'].mean():.1f}")

print(f"\n\n📊 KEY METRICS:")
print(f"\n   AI:IT Ratio:")
print(f"      Average: {monthly_by_category['AI_IT_ratio'].mean():.3f}:1")
print(f"      First month: {monthly_by_category['AI_IT_ratio'].iloc[0]:.3f}:1")
print(f"      Last month: {monthly_by_category['AI_IT_ratio'].iloc[-1]:.3f}:1")
ratio_change = monthly_by_category['AI_IT_ratio'].iloc[-1] - monthly_by_category['AI_IT_ratio'].iloc[0]
print(f"      Change: {ratio_change:+.3f}")

print(f"\n   Market Share (of tech roles):")
total_ai = monthly_by_category['AI/ML'].sum()
total_it = monthly_by_category['General IT'].sum()
total_hybrid = monthly_by_category['Hybrid'].sum()
total_tech = total_ai + total_it + total_hybrid
print(f"      AI/ML: {(total_ai/total_tech)*100:.1f}%")
print(f"      General IT: {(total_it/total_tech)*100:.1f}%")
print(f"      Hybrid: {(total_hybrid/total_tech)*100:.1f}%")

print(f"\n\n🔍 STATISTICAL FINDINGS:")
print(f"\n   Mann-Kendall Trend Test:")
print(f"      AI/ML: Tau={ai_tau:.4f}, p={ai_p_mk:.4f} {'✅ Significant trend' if ai_p_mk < 0.05 else '❌ No significant trend'}")
print(f"      General IT: Tau={it_tau:.4f}, p={it_p_mk:.4f} {'✅ Significant trend' if it_p_mk < 0.05 else '❌ No significant trend'}")

print(f"\n   Correlation with Time:")
print(f"      AI/ML: r={ai_corr:.4f}, p={ai_corr_p:.4f}")
print(f"      General IT: r={it_corr:.4f}, p={it_corr_p:.4f}")

print(f"\n\n💡 KEY INSIGHTS:")

insights = []

if ai_slope > it_slope:
    insights.append(f"   1. AI/ML roles are growing FASTER than General IT roles")
    insights.append(f"      (AI: {ai_slope:.2f} jobs/month vs IT: {it_slope:.2f} jobs/month)")
else:
    insights.append(f"   1. General IT roles are growing faster than AI/ML roles")
    insights.append(f"      (IT: {it_slope:.2f} jobs/month vs AI: {ai_slope:.2f} jobs/month)")

if ai_p < 0.05:
    insights.append(f"\n   2. AI/ML growth trend is statistically significant (p={ai_p:.4f})")
else:
    insights.append(f"\n   2. AI/ML growth trend lacks statistical significance (p={ai_p:.4f})")

if ratio_change > 0:
    insights.append(f"\n   3. AI:IT ratio is INCREASING over time ({ratio_change:+.3f})")
    insights.append(f"      AI/ML share of tech market is growing")
else:
    insights.append(f"\n   3. AI:IT ratio is DECREASING over time ({ratio_change:+.3f})")
    insights.append(f"      General IT maintains dominant share")

ai_share = (total_ai/total_tech)*100
if ai_share > 30:
    insights.append(f"\n   4. AI/ML represents a substantial share ({ai_share:.1f}%) of tech job market")
else:
    insights.append(f"\n   4. AI/ML remains a smaller segment ({ai_share:.1f}%) of tech job market")

ai_volatility = monthly_by_category['AI_ML_MoM_%'].std()
it_volatility = monthly_by_category['General_IT_MoM_%'].std()
if ai_volatility > it_volatility * 1.5:
    insights.append(f"\n   5. AI/ML postings show higher volatility (σ={ai_volatility:.1f}%) vs IT (σ={it_volatility:.1f}%)")
    insights.append(f"      AI/ML market may be more dynamic/emerging")

for insight in insights:
    print(insight)

print(f"\n\n📁 EXPORTED FILES:")
print(f"   • data/processed/time_series_trends.png")
print(f"   • data/processed/stacked_area_chart.png")
print(f"   • data/processed/ratio_analysis.png")
print(f"   • data/processed/mom_growth_rates.png")
print(f"   • data/processed/trend_analysis.png")
print(f"   • data/processed/top_ai_companies.png")
print(f"   • data/processed/company_positioning_scatter.png")
print(f"   • data/processed/monthly_time_series_summary.csv")

print("\n" + "="*70)
print("🎉 TIME SERIES ANALYSIS COMPLETE")
print("="*70)


## 14. Export Summary Statistics


In [None]:
export_cols = ['AI/ML', 'General IT', 'Hybrid', 'Total', 'AI_IT_ratio', 
               'AI_ML_MoM_%', 'General_IT_MoM_%', 'AI_ML_pct', 'General_IT_pct']

summary_export = monthly_by_category[export_cols].copy()
summary_export.to_csv('data/processed/monthly_time_series_summary.csv')

print("💾 Summary statistics exported:")
print("   File: data/processed/monthly_time_series_summary.csv")
print(f"   Rows: {len(summary_export)}")
print(f"   Columns: {len(summary_export.columns)}")

print("\n📋 Summary Table Preview:")
summary_export


## 15. Company Analysis: Top 10 AI Hiring Companies


In [None]:
print("📊 Analyzing top AI hiring companies...\n")

ai_ml_jobs = df_tech[df_tech['role_category'] == 'AI/ML']

top_ai_companies = ai_ml_jobs['company_clean'].value_counts().head(10)

fig, ax = plt.subplots(figsize=(12, 8))

colors = plt.cm.Blues(np.linspace(0.4, 0.8, len(top_ai_companies)))
bars = ax.barh(range(len(top_ai_companies)), top_ai_companies.values, color=colors)

ax.set_yticks(range(len(top_ai_companies)))
ax.set_yticklabels(top_ai_companies.index)
ax.invert_yaxis()

ax.set_xlabel('Number of AI/ML Job Postings', fontsize=12, fontweight='bold')
ax.set_ylabel('Company', fontsize=12, fontweight='bold')
ax.set_title('Top 10 Companies Hiring for AI/ML Roles', 
             fontsize=14, fontweight='bold', pad=20)

for i, (bar, value) in enumerate(zip(bars, top_ai_companies.values)):
    ax.text(value + 0.5, i, f'{value}', 
            va='center', fontsize=10, fontweight='bold')

ax.grid(True, alpha=0.3, axis='x')

plt.tight_layout()
plt.savefig('data/processed/top_ai_companies.png', dpi=300, bbox_inches='tight')
plt.show()

print("✅ Chart saved: data/processed/top_ai_companies.png")

print("\n📋 Top 10 AI/ML Hiring Companies:")
for idx, (company, count) in enumerate(top_ai_companies.items(), 1):
    pct = (count / len(ai_ml_jobs)) * 100
    print(f"   {idx:2d}. {company:<40} {count:>3} jobs ({pct:>5.1f}% of AI/ML postings)")


## 16. Company Positioning: AI vs Traditional IT


In [None]:
print("📊 Creating company positioning scatter plot...\n")

company_stats = df_tech.groupby('company_clean').agg({
    'role_category': 'count',
    'id': 'count'
}).rename(columns={'role_category': 'total_postings'})

company_ai_counts = df_tech[df_tech['role_category'] == 'AI/ML'].groupby('company_clean').size()

company_stats['ai_postings'] = company_ai_counts
company_stats['ai_postings'] = company_stats['ai_postings'].fillna(0)
company_stats['ai_percentage'] = (company_stats['ai_postings'] / company_stats['total_postings'] * 100).round(1)

company_stats = company_stats[company_stats['total_postings'] >= 5].copy()

fig, ax = plt.subplots(figsize=(14, 10))

sizes = company_stats['total_postings'] * 3

colors = company_stats['ai_percentage']
scatter = ax.scatter(company_stats['total_postings'], 
                     company_stats['ai_percentage'],
                     s=sizes, 
                     c=colors,
                     cmap='RdYlBu_r',
                     alpha=0.6,
                     edgecolors='black',
                     linewidth=0.5)

cbar = plt.colorbar(scatter, ax=ax)
cbar.set_label('AI Role Percentage (%)', rotation=270, labelpad=20, fontsize=11)

ax.axhline(y=50, color='red', linestyle='--', linewidth=1.5, alpha=0.7, label='50% AI threshold')

top_companies = company_stats.nlargest(15, 'total_postings')
for idx, row in top_companies.iterrows():
    if row['total_postings'] > 10 or row['ai_percentage'] > 40:
        ax.annotate(idx[:25], 
                   xy=(row['total_postings'], row['ai_percentage']),
                   xytext=(5, 5),
                   textcoords='offset points',
                   fontsize=8,
                   alpha=0.8,
                   bbox=dict(boxstyle='round,pad=0.3', facecolor='yellow', alpha=0.3))

ax.set_xlabel('Total Tech Job Postings', fontsize=12, fontweight='bold')
ax.set_ylabel('AI/ML Role Percentage (%)', fontsize=12, fontweight='bold')
ax.set_title('Company Positioning: AI-Heavy vs Traditional IT Companies', 
             fontsize=14, fontweight='bold', pad=20)
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('data/processed/company_positioning_scatter.png', dpi=300, bbox_inches='tight')
plt.show()

print("✅ Chart saved: data/processed/company_positioning_scatter.png")

print("\n📊 Company Positioning Analysis:")
print(f"   Total companies analyzed (≥5 postings): {len(company_stats)}")
print(f"   Average AI percentage: {company_stats['ai_percentage'].mean():.1f}%")
print(f"   Median AI percentage: {company_stats['ai_percentage'].median():.1f}%")

print("\n🔵 AI-Heavy Companies (>50% AI roles):")
ai_heavy = company_stats[company_stats['ai_percentage'] > 50].sort_values('total_postings', ascending=False).head(10)
for idx, row in ai_heavy.iterrows():
    print(f"   • {idx[:40]:<40} {row['total_postings']:>3} jobs ({row['ai_percentage']:.1f}% AI)")

print("\n🔴 Traditional IT Companies (<20% AI roles, but hiring >10 total):")
traditional = company_stats[(company_stats['ai_percentage'] < 20) & 
                           (company_stats['total_postings'] > 10)].sort_values('total_postings', ascending=False).head(10)
for idx, row in traditional.iterrows():
    print(f"   • {idx[:40]:<40} {row['total_postings']:>3} jobs ({row['ai_percentage']:.1f}% AI)")


## 17. Final Summary - All Visualizations Complete


In [None]:
print("\n" + "="*70)
print(" ALL VISUALIZATIONS COMPLETE")
print("="*70)

print("\n Step 4.1: Core Visualizations - COMPLETE")

visualizations = [
    ("1. Line Chart: AI vs General IT Over Time", "time_series_trends.png", "✅"),
    ("2. Stacked Area Chart: Role Distribution", "stacked_area_chart.png", "✅"),
    ("3. Bar Chart: Month-over-Month Growth Rates", "mom_growth_rates.png", "✅"),
    ("4. Top 10 Companies: AI Hiring", "top_ai_companies.png", "✅"),
    ("5. Scatter Plot: Company Positioning", "company_positioning_scatter.png", "✅"),
]

print("\n Required Visualizations:")
for name, filename, status in visualizations:
    print(f"   {status} {name}")
    print(f"      → data/processed/{filename}")

print("\n\n Bonus Visualizations:")
bonus_charts = [
    ("Dual-Axis: Job Counts + AI:IT Ratio", "ratio_analysis.png"),
    ("Trend Analysis: Actual vs Fitted Trends", "trend_analysis.png"),
]
for name, filename in bonus_charts:
    print(f"    {name}")
    print(f"      → data/processed/{filename}")

print("\n\n Analysis Outputs:")
print(f"    Monthly time series summary CSV")
print(f"      → data/processed/monthly_time_series_summary.csv")

print("\n" + "="*70)
print(" TIME SERIES ANALYSIS & VISUALIZATION COMPLETE")
print("="*70)
print("\nAll charts are publication-ready (300 DPI, professional styling)")
print("Ready for presentation, reports, or further analysis!")


## 18. Summary Dashboard - Single Page Overview


In [None]:
print(" Creating executive summary dashboard...\n")

fig = plt.figure(figsize=(20, 14))
gs = fig.add_gridspec(4, 3, hspace=0.4, wspace=0.3)

fig.suptitle('Job Market Analytics: AI/ML vs General IT Trends Dashboard', 
             fontsize=22, fontweight='bold', y=0.98)

ax_metrics = fig.add_subplot(gs[0, :])
ax_metrics.axis('off')

metrics_data = {
    'Total Tech Jobs': f"{df_tech.shape[0]:,}",
    'Analysis Period': f"{monthly_by_category.index[0]} to {monthly_by_category.index[-1]}",
    'Months Analyzed': f"{len(monthly_by_category)}",
    'AI/ML Jobs': f"{monthly_by_category['AI/ML'].sum():,} ({(total_ai/total_tech)*100:.1f}%)",
    'General IT Jobs': f"{monthly_by_category['General IT'].sum():,} ({(total_it/total_tech)*100:.1f}%)",
    'AI:IT Ratio': f"{monthly_by_category['AI_IT_ratio'].mean():.3f}:1",
    'AI Growth Rate': f"{ai_slope:+.2f} jobs/month",
    'IT Growth Rate': f"{it_slope:+.2f} jobs/month",
}

metrics_text = "KEY METRICS\n" + "="*120 + "\n"
for i, (key, value) in enumerate(metrics_data.items()):
    if i % 4 == 0 and i > 0:
        metrics_text += "\n"
    metrics_text += f"{key}: {value}    "

ax_metrics.text(0.5, 0.5, metrics_text, 
               transform=ax_metrics.transAxes,
               fontsize=13, 
               fontfamily='monospace',
               verticalalignment='center',
               horizontalalignment='center',
               bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.3))

monthly_plot_dash = monthly_by_category.copy()
monthly_plot_dash.index = monthly_plot_dash.index.to_timestamp()

ax1 = fig.add_subplot(gs[1, :2])
ax1.plot(monthly_plot_dash.index, monthly_plot_dash['AI/ML'], 
         marker='o', linewidth=3, markersize=8, label='AI/ML', color='#2E86AB')
ax1.plot(monthly_plot_dash.index, monthly_plot_dash['General IT'], 
         marker='s', linewidth=3, markersize=8, label='General IT', color='#A23B72')
ax1.fill_between(monthly_plot_dash.index, monthly_plot_dash['AI/ML'], alpha=0.3, color='#2E86AB')
ax1.fill_between(monthly_plot_dash.index, monthly_plot_dash['General IT'], alpha=0.3, color='#A23B72')
ax1.set_xlabel('Month', fontsize=11, fontweight='bold')
ax1.set_ylabel('Job Postings', fontsize=11, fontweight='bold')
ax1.set_title('Trend: AI/ML vs General IT Over Time', fontsize=13, fontweight='bold', pad=10)
ax1.legend(fontsize=10, loc='best')
ax1.grid(True, alpha=0.3)
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
for label in ax1.get_xticklabels():
    label.set_rotation(45)
    label.set_ha('right')

ax2 = fig.add_subplot(gs[1, 2])
growth_plot_dash = monthly_plot_dash[['AI_ML_MoM_%', 'General_IT_MoM_%']].replace([np.inf, -np.inf], np.nan).dropna()
avg_ai_growth = growth_plot_dash['AI_ML_MoM_%'].mean()
avg_it_growth = growth_plot_dash['General_IT_MoM_%'].mean()
categories = ['AI/ML', 'General IT']
values = [avg_ai_growth, avg_it_growth]
colors_bar = ['#2E86AB', '#A23B72']
bars = ax2.bar(categories, values, color=colors_bar, alpha=0.7, edgecolor='black', linewidth=2)
ax2.axhline(y=0, color='black', linestyle='-', linewidth=1)
ax2.set_ylabel('Avg MoM Growth (%)', fontsize=11, fontweight='bold')
ax2.set_title('Average Monthly Growth Rate', fontsize=13, fontweight='bold', pad=10)
ax2.grid(True, alpha=0.3, axis='y')
for bar, val in zip(bars, values):
    height = bar.get_height()
    ax2.text(bar.get_x() + bar.get_width()/2., height + (1 if height > 0 else -3),
             f'{val:.1f}%', ha='center', va='bottom' if height > 0 else 'top', 
             fontsize=11, fontweight='bold')

ax3 = fig.add_subplot(gs[2, :2])
top_10_ai = df_tech[df_tech['role_category'] == 'AI/ML']['company_clean'].value_counts().head(10)
colors_horiz = plt.cm.Blues(np.linspace(0.4, 0.8, len(top_10_ai)))
y_pos = np.arange(len(top_10_ai))
bars_h = ax3.barh(y_pos, top_10_ai.values, color=colors_horiz, edgecolor='black', linewidth=0.5)
ax3.set_yticks(y_pos)
ax3.set_yticklabels(top_10_ai.index, fontsize=10)
ax3.invert_yaxis()
ax3.set_xlabel('AI/ML Job Postings', fontsize=11, fontweight='bold')
ax3.set_title('Top 10 Companies Hiring AI/ML Roles', fontsize=13, fontweight='bold', pad=10)
ax3.grid(True, alpha=0.3, axis='x')
for i, (bar, value) in enumerate(zip(bars_h, top_10_ai.values)):
    ax3.text(value + 0.3, i, f'{value}', va='center', fontsize=9, fontweight='bold')

ax4 = fig.add_subplot(gs[2, 2])
ratio_data = monthly_plot_dash['AI_IT_ratio'].dropna()
ax4.plot(ratio_data.index, ratio_data.values, 
         marker='D', linewidth=3, markersize=7, color='#F18F01', linestyle='-')
ax4.fill_between(ratio_data.index, ratio_data.values, alpha=0.3, color='#F18F01')
ax4.set_xlabel('Month', fontsize=11, fontweight='bold')
ax4.set_ylabel('AI:IT Ratio', fontsize=11, fontweight='bold')
ax4.set_title('AI/ML:General IT Ratio Trend', fontsize=13, fontweight='bold', pad=10)
ax4.grid(True, alpha=0.3)
ax4.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
for label in ax4.get_xticklabels():
    label.set_rotation(45)
    label.set_ha('right')

ax5 = fig.add_subplot(gs[3, :])
ax5.axis('off')

if ai_slope > it_slope:
    insight_1 = f"✓ AI/ML roles are growing FASTER than General IT (+{ai_slope:.2f} vs +{it_slope:.2f} jobs/month)"
else:
    insight_1 = f"✓ General IT roles are growing faster than AI/ML (+{it_slope:.2f} vs +{ai_slope:.2f} jobs/month)"

if ratio_change > 0:
    insight_2 = f"✓ AI:IT ratio INCREASING ({ratio_change:+.3f}) - AI/ML gaining market share"
else:
    insight_2 = f"✓ AI:IT ratio stable or decreasing ({ratio_change:+.3f}) - General IT maintains dominance"

insight_3 = f"✓ AI/ML represents {(total_ai/total_tech)*100:.1f}% of tech job market"

if ai_p < 0.05:
    insight_4 = f"✓ AI/ML growth trend is statistically significant (p={ai_p:.4f})"
else:
    insight_4 = f"✓ Trends not statistically significant yet (p={ai_p:.4f}) - more data needed"

insights_text = "KEY FINDINGS\n" + "="*120 + "\n\n"
insights_text += f"{insight_1}\n\n{insight_2}\n\n{insight_3}\n\n{insight_4}"

ax5.text(0.5, 0.5, insights_text,
         transform=ax5.transAxes,
         fontsize=14,
         fontfamily='sans-serif',
         verticalalignment='center',
         horizontalalignment='center',
         bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.5, pad=1))

plt.savefig('data/processed/executive_dashboard.png', dpi=300, bbox_inches='tight')
plt.show()

print("✅ Executive Dashboard saved: data/processed/executive_dashboard.png")
print("\n📊 Dashboard includes:")
print("   • Key metrics summary")
print("   • Trend comparison chart")
print("   • Average growth rates")
print("   • Top 10 AI hiring companies")
print("   • AI:IT ratio evolution")
print("   • Key findings summary")
