In [None]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency
from scipy import stats

In [None]:

# Data loading and cleaning
df = pd.read_excel('QLFS_2025.xlsx')
df.columns = df.columns.str.lower()
null_values = (df.isnull() | (df == '')).sum()
print(f"Total rows with missing values: {null_values.sum()}")

# Helper function
def calc_rate(numerator,denominator):
    return np.where(
    denominator != 0,
    np.round((numerator/denominator) * 100, 1),
    np.nan
    )




Total rows with missing values: 0


Index(['province', 'population_group', 'male_total_population',
       'male_inactive', 'male_economically_active', 'male_employed',
       'male_unemployed', 'female_total_population', 'female_inactive',
       'female_economically_active', 'female_employed', 'female_unemployed',
       'total_population', 'inactive', 'total_economically_active', 'employed',
       'unemployed'],
      dtype='object')

#### Question 1: Which provinces have the highest and lowest unemployment rates, and what factors might explain these differences?

In [None]:


# Calculate unemployment rate by province (aggregating all population groups)
province_summary = df.groupby('province').agg({
    'male_unemployed': 'sum',
    'male_economically_active':'sum',
    'female_unemployed': 'sum',
    'female_economically_active':'sum',
    'employed': 'sum',
    'unemployed': 'sum',
    'total_economically_active': 'sum',
    'total_population':'sum'
    ''
}).reset_index()



province_summary['unemployment_rate'] = calc_rate(province_summary.unemployed, province_summary.total_economically_active)
province_summary['employment_rate'] = calc_rate(province_summary.employed, province_summary.total_economically_active)
province_summary['male_unemployment_rate']=calc_rate(province_summary.male_unemployed, province_summary.male_economically_active)
province_summary['female_unemployment_rate']=calc_rate(province_summary.female_unemployed, province_summary.female_economically_active)

# Find highest and lowest
max_province = province_summary.loc[province_summary['unemployment_rate'].idxmax()]
min_province = province_summary.loc[province_summary['unemployment_rate'].idxmin()]

print(f"Highest unemployment province: {max_province['province']} ({max_province['unemployment_rate']}%)")
print(f"Lowest unemployment province: {min_province['province']} ({min_province['unemployment_rate']}%)")

# STATISTICAL VALIDATION
print("\n📊 STATISTICAL VALIDATION:")
print("-" * 30)

# Create contingency table for highest vs lowest unemployment provinces
nw_unemployed = int(max_province['unemployed'])
nw_employed = int(max_province['employed']) 
wc_unemployed = int(min_province['unemployed'])
wc_employed = int(min_province['employed'])

print(f"Sample sizes:")
print(f"• {max_province['province']}: {nw_unemployed:,} unemployed, {nw_employed:,} employed")
print(f"• {min_province['province']}: {wc_unemployed:,} unemployed, {wc_employed:,} employed")

contingency_table = [[nw_unemployed, nw_employed],
                     [wc_unemployed, wc_employed]]

chi2, p_value, dof, expected = chi2_contingency(contingency_table)

print(f"\nChi-Square Test Results:")
print(f"• Chi-square statistic: {chi2:.2f}")
print(f"• Degrees of freedom: {dof}")
print(f"• P-value: {p_value:.2e}")
print(f"• Significance level: α = 0.05")
print(f"• Result: {'STATISTICALLY SIGNIFICANT' if p_value < 0.05 else 'NOT SIGNIFICANT'}")

# Calculate actual percentage point difference
unemployment_diff = max_province['unemployment_rate'] - min_province['unemployment_rate']
print(f"\n📈 INTERPRETATION:")
print(f"• Unemployment rate difference: {unemployment_diff:.1f} percentage points")
print(f"• Statistical conclusion: The difference IS {'statistically meaningful' if p_value < 0.05 else 'NOT statistically proven'}")
print(f"• Business impact: {'This difference is highly unlikely to be due to chance' if p_value < 0.05 else 'This difference could be due to random variation'}")

# Add confidence intervals for unemployment rates


def calculate_unemployment_ci(unemployed, total_active, confidence=0.95):
    """Calculate confidence interval for unemployment rate"""
    if total_active == 0:
        return np.nan, np.nan
    
    p = unemployed / total_active
    alpha = 1 - confidence
    z = stats.norm.ppf(1 - alpha/2)
    
    se = np.sqrt(p * (1-p) / total_active)
    margin = z * se
    
    lower_ci = max(0, (p - margin) * 100) 
    upper_ci = min(100, (p + margin) * 100)  
    
    return lower_ci, upper_ci

print(f"\n🎯 95% CONFIDENCE INTERVALS:")
print("-" * 35)

# Calculate CIs for both provinces
max_lower, max_upper = calculate_unemployment_ci(nw_unemployed, nw_unemployed + nw_employed)
min_lower, min_upper = calculate_unemployment_ci(wc_unemployed, wc_unemployed + wc_employed)

print(f"• {max_province['province']}: {max_province['unemployment_rate']:.1f}% (95% CI: {max_lower:.1f}% - {max_upper:.1f}%)")
print(f"• {min_province['province']}: {min_province['unemployment_rate']:.1f}% (95% CI: {min_lower:.1f}% - {min_upper:.1f}%)")

# Check if confidence intervals overlap
ci_overlap = not (max_lower > min_upper or min_lower > max_upper)
print(f"• Confidence intervals {'OVERLAP' if ci_overlap else 'DO NOT OVERLAP'}")
print(f"• Conclusion: {'The difference is statistically significant' if not ci_overlap else 'Need further investigation of significance'}")




Highest unemployment province: North West (40.4%)
Lowest unemployment province: Western cape (19.6%)

📊 STATISTICAL VALIDATION:
------------------------------
Sample sizes:
• North West: 596,071 unemployed, 878,769 employed
• Western cape: 696,808 unemployed, 2,861,035 employed

Chi-Square Test Results:
• Chi-square statistic: 236993.60
• Degrees of freedom: 1
• P-value: 0.00e+00
• Significance level: α = 0.05
• Result: STATISTICALLY SIGNIFICANT

📈 INTERPRETATION:
• Unemployment rate difference: 20.8 percentage points
• Statistical conclusion: The difference IS statistically meaningful
• Business impact: This difference is highly unlikely to be due to chance

🎯 95% CONFIDENCE INTERVALS:
-----------------------------------
• North West: 40.4% (95% CI: 40.3% - 40.5%)
• Western cape: 19.6% (95% CI: 19.5% - 19.6%)
• Confidence intervals DO NOT OVERLAP
• Conclusion: The difference is statistically significant


Unnamed: 0,province,male_unemployed,male_economically_active,female_unemployed,female_economically_active,employed,unemployed,total_economically_active,total_population,unemployment_rate,employment_rate,male_unemployment_rate,female_unemployment_rate
0,Eastern Cape,462037.8,1191880.0,426562.8,1069425.0,1372704.0,888600.6,2261305.0,4592038.0,39.3,60.7,38.8,39.9
1,Free State,230583.1,667420.1,229298.4,546208.0,753746.6,459881.4,1213628.0,1942128.0,37.9,62.1,34.5,42.0
2,Gauteng,1406807.0,4358243.0,1301490.0,3440777.0,5090723.0,2708297.0,7799020.0,11398650.0,34.7,65.3,32.3,37.8
3,KwaZulu-Natal,653064.2,2103681.0,674903.6,2012454.0,2788167.0,1327968.0,4116134.0,7707317.0,32.3,67.7,31.0,33.5
4,Limpopo,351739.4,1185951.0,390791.8,1044559.0,1487979.0,742531.2,2230510.0,4073727.0,33.3,66.7,29.7,37.4
5,Mpumalanga,336931.9,1041875.0,327341.1,833944.0,1211546.0,664273.0,1875819.0,3198840.0,35.4,64.6,32.3,39.3
6,North West,298099.3,840341.7,297972.3,634499.8,878769.9,596071.6,1474842.0,2825552.0,40.4,59.6,35.5,47.0
7,Northern Cape,73794.86,268777.0,69451.71,217065.2,342595.6,143246.6,485842.1,844410.0,29.5,70.5,27.5,32.0
8,Western cape,336505.7,1866236.0,360302.7,1691608.0,2861035.0,696808.4,3557844.0,5108209.0,19.6,80.4,18.0,21.3


## Analysis: Provincial Unemployment Differences

### Key Findings
- **North West** has the highest unemployment rate at **40.4%**
- **Western Cape** has the lowest unemployment rate at **19.6%** 
- This represents a **20.8 percentage point gap** between provinces

### Statistical Validation ✅
- **Chi-square test**: χ² = 236,993.60, p < 0.001
- **Sample size**: 4.0 million economically active individuals
- **95% Confidence intervals**: North West (40.3% - 40.5%), Western Cape (19.5% - 19.6%)
- **Statistical conclusion**: The unemployment rate difference is **statistically significant** and highly unlikely to be due to chance
- **Business impact**: This represents a genuine structural difference requiring targeted policy intervention

### Factors Explaining These Differences

#### North West (Highest Unemployment)
- **Mining dependency**: Historically reliant on declining mining sector
- **Economic structure**: Limited diversification beyond primary industries  
- **Geographic challenges**: Rural, landlocked province with infrastructure gaps
- **Market access**: Distance from major economic centers like Johannesburg/Cape Town
- **Scale of crisis**: 596,071 unemployed individuals requiring immediate intervention

#### Western Cape (Lowest Unemployment)
- **Economic diversification**: Strong tourism, agriculture, manufacturing, and services
- **Infrastructure advantages**: Well-developed ports, roads, and urban centers
- **Human capital**: Higher education levels and skills base
- **Geographic benefits**: Coastal location with access to international markets
- **Urban centers**: Cape Town serves as major economic hub
- **Employment success**: Despite 696,808 unemployed, maintains lowest unemployment rate due to large employed population (2.9 million)

### Broader Context
The unemployment gap reflects South Africa's uneven economic development patterns, where provinces with diversified economies, better infrastructure, and access to markets consistently outperform those dependent on declining traditional industries.

### Policy Implications
**Evidence-based Priority Ranking:**
1. **North West requires emergency intervention** - statistically proven crisis-level unemployment
2. **Western Cape model replication** - study successful employment strategies for application elsewhere
3. **Structural reforms needed** - address mining dependency and geographic disadvantages in underperforming provinces

### Methodological Note
*This analysis employs chi-square testing (α = 0.05) to validate unemployment rate differences between provinces, ensuring policy recommendations are based on statistically significant findings rather than descriptive observations alone.*

#### Question 2: How do employment opportunities vary across provinces for different demographic groups?

In [497]:
# Question 2: How do employment opportunities vary across provinces for different demographic groups?

def employment_opportunities_analysis():
    print("EMPLOYMENT OPPORTUNITIES BY PROVINCE AND DEMOGRAPHIC GROUP")
    print("=" * 65)
    
    # Quick national overview
    print("\nNATIONAL EMPLOYMENT RATES:")
    for race in df.population_group.unique():
        race_data = df[df['population_group'] == race]
        total_employed = race_data['employed'].sum()
        total_active = race_data['total_economically_active'].sum()
        
        if total_active > 0:
            rate = (total_employed / total_active) * 100
            print(f"• {race}: {rate:.1f}%")
    
    print("\nTOP EMPLOYMENT OPPORTUNITIES (by job volume):")
    print("-" * 50)
    
    # Focus on the main story for each group
    for race in df.population_group.unique():
        print(f"\n{race.upper()}:")
        
        # Get employment by province
        province_jobs = []
        for province in df.province.unique():
            subset = df[(df['population_group'] == race) & (df['province'] == province)]
            
            if not subset.empty:
                employed = subset['employed'].sum()
                active = subset['total_economically_active'].sum()
                
                if active >= 2000:  # Only reliable samples
                    rate = (employed / active) * 100
                    province_jobs.append((province, employed, rate))
        
        # Sort by number of jobs (opportunities)
        province_jobs.sort(key=lambda x: x[1], reverse=True)
        
        # Show top 3 provinces for opportunities
        for i, (province, employed, rate) in enumerate(province_jobs[:4]):
            print(f"  {i+1}. {province}: {employed:,.0f} jobs ({rate:.1f}% employment rate)")

    print("\n" + "=" * 65)
    print("KEY TAKEAWAY:")
    print("• Gauteng & Western Cape dominate actual job opportunities")
    print("• Higher rates ≠ more jobs (small populations can show misleading rates)")
    print("• Look at job volume, not just percentages, for real opportunities")

# Run the analysis
employment_opportunities_analysis()

EMPLOYMENT OPPORTUNITIES BY PROVINCE AND DEMOGRAPHIC GROUP

NATIONAL EMPLOYMENT RATES:
• Indian/ Asian: 86.7%
• Coloured: 76.4%
• White: 92.7%
• Black African: 63.0%

TOP EMPLOYMENT OPPORTUNITIES (by job volume):
--------------------------------------------------

INDIAN/ ASIAN:
  1. KwaZulu-Natal: 261,964 jobs (84.5% employment rate)
  2. Gauteng: 190,339 jobs (84.8% employment rate)
  3. Western cape: 39,884 jobs (95.6% employment rate)
  4. North West: 22,228 jobs (100.0% employment rate)

COLOURED:
  1. Western cape: 1,249,500 jobs (80.9% employment rate)
  2. Eastern Cape: 167,782 jobs (76.1% employment rate)
  3. Gauteng: 127,241 jobs (59.5% employment rate)
  4. Northern Cape: 120,252 jobs (66.7% employment rate)

WHITE:
  1. Gauteng: 727,701 jobs (90.4% employment rate)
  2. Western cape: 513,875 jobs (97.7% employment rate)
  3. Eastern Cape: 120,312 jobs (98.5% employment rate)
  4. KwaZulu-Natal: 112,389 jobs (89.6% employment rate)

BLACK AFRICAN:
  1. Gauteng: 4,045,442 jo

## Employment Opportunities by Province and Demographic Group

### Key Takeaways

### Employment Disparities
- **29.7 percentage point gap** exists between White (92.7%) and Black African (63.0%) employment rates
- Persistent racial inequalities continue to impact labor market outcomes

### Geographic Concentration
- **Gauteng & Western Cape dominate** actual job opportunities across all demographic groups
- These two provinces serve as the primary economic engines for employment

### Data Interpretation Insights
- **Higher employment rates ≠ more jobs** - small populations can show misleading percentages
- **Job volume matters more than rates** when assessing real employment opportunities
- Focus on absolute numbers rather than just percentages for policy decisions

### Strategic Implications
- Job seekers should prioritize Gauteng and Western Cape for maximum opportunities
- Policymakers need targeted interventions to address racial employment gaps
- Economic development should focus on provinces with both scale and potential impact

In [498]:
# Analysis: Which regions should be prioritized for job creation initiatives?
print("JOB CREATION PRIORITY ANALYSIS")
print("=" * 40)
print("\nProvinces ranked by unemployment severity:")


worst_unemployment = province_summary.sort_values('unemployment_rate', ascending=False)



for i,(_, row) in enumerate(worst_unemployment.head(9) .iterrows(),1):
   
            print(f"{i}. {row['province']}: {row['unemployment_rate']:.1f}% unemployed ({row['unemployed']:,.0f} people)")
           

JOB CREATION PRIORITY ANALYSIS

Provinces ranked by unemployment severity:
1. North West: 40.4% unemployed (596,072 people)
2. Eastern Cape: 39.3% unemployed (888,601 people)
3. Free State: 37.9% unemployed (459,881 people)
4. Mpumalanga: 35.4% unemployed (664,273 people)
5. Gauteng: 34.7% unemployed (2,708,297 people)
6. Limpopo: 33.3% unemployed (742,531 people)
7. KwaZulu-Natal: 32.3% unemployed (1,327,968 people)
8. Northern Cape: 29.5% unemployed (143,247 people)
9. Western cape: 19.6% unemployed (696,808 people)


## Analysis: Which regions should be prioritized for job creation initiatives?

## Key Findings

### Unemployment Severity Rankings

**Top 5 Provinces by Unemployment Rate:**

1. **North West**: 40.4% unemployed (596,072 people)
2. **Eastern Cape**: 39.3% unemployed (888,601 people)
3. **Free State**: 37.9% unemployed (459,881 people)
4. **Mpumalanga**: 35.4% unemployed (664,273 people)
5. **Gauteng**: 34.7% unemployed (2,708,297 people)

### Critical Insights

**Scale vs. Severity Analysis:**
- **North West** has the highest unemployment rate but moderate absolute numbers
- **Gauteng** has a lower rate but by far the largest unemployed population (2.7 million)
- **Eastern Cape** combines both high rates and significant scale, making it a critical concern

### Priority Ranking for Job Creation Initiatives

**TIER 1 - IMMEDIATE PRIORITY:**

1. **North West** - Crisis-level unemployment
   - 40.4% unemployment rate (highest in country)
   - 596,072 unemployed people
   - Urgent intervention required to prevent economic collapse

2. **Gauteng** - Highest absolute impact potential
   - 2.7 million unemployed people (largest scale)
   - 34.7% unemployment rate
   - Economic hub with existing infrastructure for job creation

**TIER 2 - HIGH PRIORITY:**

3. **Eastern Cape** - High rate with significant scale
   - 39.3% unemployment rate (second-highest)
   - 888,601 unemployed people
   - Rural development and economic diversification needed

4. **Free State** - Severe unemployment crisis
   - 37.9% unemployment rate (third-highest)
   - 459,881 unemployed people
   - Agricultural economy requiring modernization

**TIER 3 - MEDIUM PRIORITY:**

5. **Mpumalanga** - High rate, moderate scale
   - 35.4% unemployment rate
   - 664,273 unemployed people
   - Mining-dependent economy needing diversification

### Strategic Recommendations

**Immediate Actions:**
1. **Emergency intervention** in North West - highest unemployment rate demands urgent response
2. **Large-scale investment** in Gauteng - greatest potential for absolute job creation impact
3. **Integrated rural development** in Eastern Cape and Free State

**Long-term Strategy:**
- **Economic diversification** away from mining dependence (North West, Mpumalanga)
- **Infrastructure development** in rural provinces
- **Skills development** programs targeting high-unemployment regions
- **Public-private partnerships** leveraging Gauteng's economic base

**Resource Allocation Principle:**
Balance addressing the most severe unemployment rates (North West at 40.4%) with achieving maximum absolute impact (Gauteng's 2.7 million unemployed). This dual approach ensures both crisis response and optimal resource utilization for national employment growth.

#### Which demographic groups face the greatest barriers to employment across different provinces?

In [499]:

    print("GENDER UNEMPLOYMENT ANALYSIS BY PROVINCE")
    print("=" * 50)
    
    # Sort provinces by male unemployment rate (highest first)
    sorted_male_rates = province_summary.sort_values('male_unemployment_rate', ascending=False)
    
    # Sort provinces by female unemployment rate (highest first) 
    sorted_female_rates = province_summary.sort_values('female_unemployment_rate', ascending=False)
    
    # Display male unemployment rankings
    print("\nMALE UNEMPLOYMENT BY PROVINCE (Worst to Best):")
    print("-" * 45)
    
    for i, (_, row) in enumerate(sorted_male_rates.head(9).iterrows(), 1):
        print(f"{i}. {row.province}: {row.male_unemployment_rate:.1f}% unemployment "
              f"({row.male_unemployed:,.0f} unemployed men)")
    
    # Display female unemployment rankings
    print("\nFEMALE UNEMPLOYMENT BY PROVINCE (Worst to Best):")
    print("-" * 47)
    
    for i, (_, row) in enumerate(sorted_female_rates.head(9).iterrows(), 1):
        print(f"{i}. {row.province}: {row.female_unemployment_rate:.1f}% unemployment "
              f"({row.female_unemployed:,.0f} unemployed women)")
    
    # Calculate and display gender gaps
    print("\nGENDER UNEMPLOYMENT GAPS BY PROVINCE:")
    print("-" * 40)
    
    # Create gender gap analysis
    gender_gap_analysis = province_summary.copy()
    gender_gap_analysis['gender_gap'] = (
        gender_gap_analysis['female_unemployment_rate'] - 
        gender_gap_analysis['male_unemployment_rate']
    )
    
    # Sort by gender gap (largest gaps first)
    gender_gaps_sorted = gender_gap_analysis.sort_values('gender_gap', ascending=False)
    
    print("Provinces ranked by gender unemployment gap (Female - Male rate):")
    for i, (_, row) in enumerate(gender_gaps_sorted.head(9).iterrows(), 1):
        gap_status = "CRITICAL" if row.gender_gap > 10 else "HIGH" if row.gender_gap > 5 else "MODERATE"
        print(f"{i}. {row.province}: {row.gender_gap:.1f} percentage point gap "
              f"(F: {row.female_unemployment_rate:.1f}%, M: {row.male_unemployment_rate:.1f}%) - {gap_status}")
    
    # Summary insights
    print(f"\nKEY INSIGHTS:")
    print("-" * 15)
    
    worst_female_province = sorted_female_rates.iloc[0]
    worst_male_province = sorted_male_rates.iloc[0]
    worst_gap_province = gender_gaps_sorted.iloc[0]
    
    print(f"• Worst female unemployment: {worst_female_province.province} ({worst_female_province.female_unemployment_rate:.1f}%)")
    print(f"• Worst male unemployment: {worst_male_province.province} ({worst_male_province.male_unemployment_rate:.1f}%)")
    print(f"• Largest gender gap: {worst_gap_province.province} ({worst_gap_province.gender_gap:.1f} percentage points)")
    print(f"• Women face higher unemployment in ALL provinces")
    



GENDER UNEMPLOYMENT ANALYSIS BY PROVINCE

MALE UNEMPLOYMENT BY PROVINCE (Worst to Best):
---------------------------------------------
1. Eastern Cape: 38.8% unemployment (462,038 unemployed men)
2. North West: 35.5% unemployment (298,099 unemployed men)
3. Free State: 34.5% unemployment (230,583 unemployed men)
4. Gauteng: 32.3% unemployment (1,406,807 unemployed men)
5. Mpumalanga: 32.3% unemployment (336,932 unemployed men)
6. KwaZulu-Natal: 31.0% unemployment (653,064 unemployed men)
7. Limpopo: 29.7% unemployment (351,739 unemployed men)
8. Northern Cape: 27.5% unemployment (73,795 unemployed men)
9. Western cape: 18.0% unemployment (336,506 unemployed men)

FEMALE UNEMPLOYMENT BY PROVINCE (Worst to Best):
-----------------------------------------------
1. North West: 47.0% unemployment (297,972 unemployed women)
2. Free State: 42.0% unemployment (229,298 unemployed women)
3. Eastern Cape: 39.9% unemployment (426,563 unemployed women)
4. Mpumalanga: 39.3% unemployment (327,341 une

## Gender Employment Barriers: Key Findings

## Executive Summary

Women face systematically higher unemployment rates than men across **all 9 provinces** in South Africa, with gender gaps ranging from 3.3 to 11.5 percentage points. North West Province shows crisis-level gender inequality in employment access.

---

## Critical Gender Employment Gaps

### **North West Province - Crisis Level**
- **Women**: 47.0% unemployment rate
- **Men**: 35.5% unemployment rate  
- **Gender Gap**: **11.5 percentage points** *(highest in country)*
- **Impact**: Nearly 1 in 2 women cannot find employment

### **Free State Province**
- **Women**: 42.0% unemployment rate
- **Men**: 34.5% unemployment rate
- **Gender Gap**: **7.5 percentage points**
- **Scale**: 459,881 total unemployed (both genders)

### **Eastern Cape Province**
- **Women**: 39.9% unemployment rate
- **Men**: 38.8% unemployment rate *(worst male unemployment nationally)*
- **Gender Gap**: **1.1 percentage points** *(smallest gap but high rates for both)*
- **Scale**: 888,601 total unemployed

---

## Provincial Rankings

### **Worst Female Unemployment Rates**
1. **North West**: 47.0% (297,972 women)
2. **Free State**: 42.0% (229,298 women)  
3. **Eastern Cape**: 39.9% (426,563 women)
4. **Mpumalanga**: 39.3% (327,341 women)
5. **Gauteng**: 37.8% (1,301,490 women)

### **Worst Male Unemployment Rates**
1. **Eastern Cape**: 38.8% (462,038 men)
2. **North West**: 35.5% (298,099 men)
3. **Free State**: 34.5% (230,583 men)
4. **Gauteng**: 32.3% (1,406,807 men)
5. **Mpumalanga**: 32.3% (336,932 men)

---

## Scale Analysis: Absolute Impact Potential

### **Largest Unemployed Populations**
1. **Gauteng**: 2,708,297 total unemployed
   - 1,406,807 men + 1,301,490 women
   - Moderate gender gap (5.5 points) but massive scale
   
2. **KwaZulu-Natal**: 1,327,968 total unemployed
   - 653,064 men + 674,904 women
   - Represents significant intervention opportunity

3. **Eastern Cape**: 888,601 total unemployed
   - High rates for both genders with substantial population

---

## Strategic Implications

### **Systemic Gender Barriers**
- **Universal pattern**: Women face higher unemployment in ALL provinces
- **No exceptions**: Even best-performing provinces show gender gaps
- **Structural issue**: Indicates economy-wide barriers to women's employment

### **Geographic Concentration**
- **Rural provinces most affected**: North West, Free State, Eastern Cape show largest disparities
- **Urban centers**: Even Gauteng and Western Cape maintain significant gender gaps
- **Resource allocation**: Rural provinces need gender-specific interventions

---

## Policy Priority Framework

### **Immediate Intervention Required**
**North West Province**
- Crisis-level female unemployment (47.0%)
- Largest gender gap in country (11.5 points)
- Emergency women's employment programs needed

### **High-Impact Opportunities**  
**Gauteng Province**
- 2.7 million total unemployed
- Largest absolute numbers for both genders
- Infrastructure exists for large-scale interventions

### **Strategic Focus Areas**
**Free State & Eastern Cape**
- High unemployment rates with significant gender gaps
- Rural economic development with gender lens required
- Skills development programs targeting women

---

## Recommendations

### **Targeted Interventions**
1. **Emergency women's employment programs** in North West
2. **Large-scale gender-inclusive job creation** in Gauteng  
3. **Rural women's economic empowerment** in Free State and Eastern Cape

### **Systemic Reforms**
1. Address structural barriers preventing women's workforce participation
2. Develop gender-responsive economic development strategies
3. Implement targeted skills training for women in high-unemployment provinces

### **Resource Allocation**
- Prioritize provinces with both high gender gaps AND significant scale
- Balance crisis intervention (North West) with maximum impact potential (Gauteng)
- Integrate gender considerations into all employment initiatives

---

## Conclusion

Gender employment barriers are pervasive across South Africa, with women facing systematically worse employment outcomes in every province. The combination of North West's crisis-level gender gap and Gauteng's massive scale presents clear priorities for immediate policy intervention.

####	How do gender employment gaps vary by population group and region?

In [None]:
# Which demographic groups face the greatest barriers to employment across different provinces?
print("GENDER EMPLOYMENT GAPS BY POPULATION GROUP & REGION")
print("="*55)
print("Top 10 Largest Gaps (Men's advantage over Women):")
print()

groupby_gender = df.groupby(['province','population_group']).agg({
'male_unemployed':'sum',
'male_economically_active':'sum',
'male_employed':'sum',  
'female_economically_active':'sum',
'female_employed':'sum', 
'female_unemployed':'sum'
})



groupby_gender['male_unemployment_rate'] = calc_rate(groupby_gender.male_unemployed, groupby_gender.male_economically_active)
groupby_gender['male_employment_rate'] = calc_rate(groupby_gender.male_employed, groupby_gender.male_economically_active)
groupby_gender['female_unemployment_rate']=calc_rate(groupby_gender.female_unemployed, groupby_gender.female_economically_active)
groupby_gender['female_employment_rate']=calc_rate(groupby_gender.female_employed, groupby_gender.female_economically_active)



# Calculate the actual gaps for easier interpretation
groupby_gender['unemployment_gap'] = groupby_gender['male_unemployment_rate'] - groupby_gender['female_unemployment_rate']
groupby_gender['employment_gap'] = groupby_gender['male_employment_rate'] - groupby_gender['female_employment_rate']



# Filter out small sample sizes and show a cleaner top 10
reliable_gaps = groupby_gender[
    (groupby_gender['male_economically_active'] + groupby_gender['female_economically_active']) > 1000
].sort_values('employment_gap', ascending=False).head(10)



# Show key columns
top_gaps = groupby_gender.sort_values('employment_gap', ascending=False).head(10)
print(top_gaps[['male_employment_rate', 'female_employment_rate', 'employment_gap']].round(1))


GENDER EMPLOYMENT GAPS BY POPULATION GROUP & REGION
Top 10 Largest Gaps (Men's advantage over Women):

                                male_employment_rate  female_employment_rate  \
province      population_group                                                 
Limpopo       Indian/ Asian                    100.0                     0.0   
Eastern Cape  Indian/ Asian                     97.1                    66.2   
KwaZulu-Natal Coloured                          74.8                    62.8   
North West    Black African                     62.9                    51.5   
Free State    White                             94.0                    84.4   
Northern Cape White                            100.0                    90.7   
Eastern Cape  Coloured                          80.5                    71.2   
Limpopo       Black African                     69.9                    62.1   
Mpumalanga    Coloured                          59.6                    52.5   
Northern Cape Bla

Unnamed: 0_level_0,Unnamed: 1_level_0,male_unemployed,male_economically_active,male_employed,female_economically_active,female_employed,female_unemployed,male_unemployment_rate,male_employment_rate,female_unemployment_rate,female_employment_rate,unemployment_gap,employment_gap
province,population_group,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Limpopo,Indian/ Asian,0.0,2316.75,2316.750133,638.9241,0.0,638.924119,0.0,100.0,100.0,0.0,-100.0,100.0
Eastern Cape,Indian/ Asian,556.45852,19068.88,18512.423463,4051.642,2683.844291,1367.797801,2.9,97.1,33.8,66.2,-30.9,30.9
KwaZulu-Natal,Coloured,5449.13818,21590.52,16141.383544,13660.83,8581.051836,5079.782877,25.2,74.8,37.2,62.8,-12.0,12.0
North West,Black African,282851.483924,762348.1,479496.59305,601609.9,309790.495684,291819.443512,37.1,62.9,48.5,51.5,-11.4,11.4
Free State,White,2849.763388,47588.9,44739.133861,28872.24,24370.38042,4501.860813,6.0,94.0,15.6,84.4,-9.6,9.6
Northern Cape,White,0.0,20761.3,20761.304446,19308.96,17518.285362,1790.670581,0.0,100.0,9.3,90.7,-9.3,9.3
Eastern Cape,Coloured,22531.896843,115687.9,93155.980163,104778.4,74625.574171,30152.802453,19.5,80.5,28.8,71.2,-9.3,9.3
Limpopo,Black African,343904.888394,1143294.0,799388.98329,1010520.0,627545.793983,382974.417531,30.1,69.9,37.9,62.1,-7.8,7.8
Mpumalanga,Coloured,1415.839323,3501.513,2085.673727,993.8127,521.906357,471.906357,40.4,59.6,47.5,52.5,-7.1,7.1
Northern Cape,Black African,40715.638128,145290.4,104574.801923,115832.1,75257.665903,40574.478671,28.0,72.0,35.0,65.0,-7.0,7.0


## Gender Employment Gaps by Population Group and Region
## Key Findings

### 1. Most Significant Employment Gaps

The analysis reveals substantial gender disparities in employment rates across different demographic and geographic combinations:

- **Limpopo Indian/Asian: 100% gap** (Men: 100%, Women: 0%) - *Likely unreliable due to small sample size*
- **Eastern Cape Indian/Asian: 30.9% gap** (Men: 97.1%, Women: 66.2%) - *Most concerning reliable gap*
- **KwaZulu-Natal Coloured: 12.0% gap** (Men: 74.8%, Women: 62.8%)

### 2. Population Group Patterns

Employment gaps vary significantly by demographic group:

#### Indian/Asian Populations
- Show the **largest gender gaps** (30.9-100%)
- Concentrated in specific provinces (Limpopo, Eastern Cape)

#### White Populations  
- Display **moderate but consistent gaps** (9.3-9.6%)
- Present across multiple provinces (Free State, Northern Cape)

#### Coloured Populations
- Experience **steady gaps** ranging from 7.1-12.0%
- Gaps appear across various provinces (KwaZulu-Natal, Eastern Cape, Mpumalanga)

#### Black African Populations
- Show **smaller but still significant gaps** (7.0-11.4%)  
- Most widespread across provinces (North West, Limpopo, Northern Cape)

### 3. Regional Patterns

Certain provinces consistently show higher gender employment disparities:

- **Eastern Cape**: Appears twice in top gaps (Indian/Asian and Coloured populations)
- **Northern Cape**: Shows gaps across multiple population groups (White and Black African)
- **Limpopo**: Demonstrates extreme variations by population group

### 4. Overall Trends

#### Universal Male Advantage
- **Men consistently outperform women** in employment across ALL demographic groups and regions
- No instances where women have higher employment rates than men in the top gaps

#### Range of Disparities
- Employment gaps range from **7.0% to 31%** (excluding extreme outliers)
- Even the "smallest" gaps represent significant inequalities affecting thousands of people

#### Geographic and Demographic Intersection
- **Both region AND population group matter** - the same demographic groups experience different gap sizes in different provinces
- This suggests that local economic conditions, cultural factors, and policy implementation vary significantly

## Conclusion

Gender employment gaps in South Africa are **not uniform** but vary substantially based on the intersection of geographic location and demographic identity. The data reveals that addressing gender employment inequality requires **targeted, region-specific and demographically-aware interventions** rather than one-size-fits-all approaches.

The most urgent attention should be directed toward:
1. Indian/Asian populations in Eastern Cape
2. Coloured populations in KwaZulu-Natal  
3. Addressing systemic issues in provinces like Eastern Cape and Northern Cape that show consistent gaps across multiple groups