# South African Public Sector Employment Growth Prediction
This notebook builds a linear regression model using employment data from 2020 and 2023, predicts employment for 2024–2028, and visualizes the results using Seaborn and Matplotlib.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression

## Sample Dataset
We use a sample dataset representing employment in various public sectors in South Africa for the years 2020 and 2023.

In [None]:
# Sample dataset
data = {
    'Sector': [
        'Education', 'Healthcare', 'Public Administration',
        'Police Services', 'Social Development', 'Infrastructure',
        'Environmental Affairs'
    ],
    'Employment_2020': [120000, 95000, 80000, 60000, 40000, 30000, 20000],
    'Employment_2023': [135000, 105000, 85000, 65000, 45000, 35000, 25000]
}

df = pd.DataFrame(data)
df

## Data Transformation
We reshape the data to prepare it for modeling.

In [None]:
# Reshape data for modeling
df_long = pd.DataFrame({
    'Sector': df['Sector'].repeat(2),
    'Year': [2020, 2023] * len(df),
    'Employment': np.concatenate([df['Employment_2020'], df['Employment_2023']])
})
df_long

## Model Training and Prediction
We train a linear regression model for each sector and predict employment for the next five years.

In [None]:
# Train linear regression model for each sector
predictions = []
for sector in df['Sector']:
    sector_data = df_long[df_long['Sector'] == sector]
    X = sector_data[['Year']]
    y = sector_data['Employment']
    
    model = LinearRegression()
    model.fit(X, y)
    
    future_years = np.array([[2024], [2025], [2026], [2027], [2028]])
    predicted = model.predict(future_years)
    
    for year, emp in zip(future_years.flatten(), predicted):
        predictions.append({'Sector': sector, 'Year': year, 'Predicted_Employment': emp})

# Create prediction DataFrame
prediction_df = pd.DataFrame(predictions)
prediction_df

## Visualization
We visualize the actual and predicted employment data using Seaborn and Matplotlib.

In [None]:
# Plot actual and predicted employment
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_long, x='Year', y='Employment', hue='Sector', marker='o', legend=False)
sns.lineplot(data=prediction_df, x='Year', y='Predicted_Employment', hue='Sector', linestyle='--')
plt.title('South African Public Sector Employment Growth Prediction (2020–2028)')
plt.xlabel('Year')
plt.ylabel('Employment')
plt.grid(True)
plt.tight_layout()
plt.show()

## Save Predictions
We save the predicted employment data to a CSV file.

In [None]:
# Save predictions to CSV
prediction_df.to_csv("employment_growth_predictions.csv", index=False)