# South African Political Party Prediction - EDA

Exploratory Data Analysis of SA Election Data (2009-2024)

In [1]:
# Setup and Imports (Essential packages only)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import warnings

# Configuration
warnings.filterwarnings('ignore')
plt.style.use('default')

# Set up plotting parameters for beautiful charts
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 10
plt.rcParams['axes.grid'] = True
plt.rcParams['grid.alpha'] = 0.3

# Define South African political party colors
SA_COLORS = {
    'ANC': '#006600',      # Green
    'DA': '#005ba6',       # Blue  
    'EFF': '#FF0000',      # Red
    'IFP': '#FFD700',      # Gold
    'FF+': '#FFA500',      # Orange
    'ACDP': '#800080',     # Purple
    'Other': '#808080'     # Gray
}

print("🇿🇦 SA Political Data - Exploratory Data Analysis")
print("=" * 60)
print("✅ Essential libraries imported successfully")
print("🎨 SA political party colors configured")
print("📊 Ready for data analysis!")

# Display versions
print(f"\n📦 Pandas version: {pd.__version__}")
print(f"📦 NumPy version: {np.__version__}")
print(f"📦 Matplotlib version: {plt.matplotlib.__version__}")

ModuleNotFoundError: No module named 'seaborn'

In [1]:
# Setup and Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings

warnings.filterwarnings('ignore')
plt.style.use('default')
plt.rcParams['figure.figsize'] = (12, 8)

print("🇿🇦 SA Political Data - Exploratory Data Analysis")
print("=" * 60)
print("✅ All libraries imported successfully")

ModuleNotFoundError: No module named 'seaborn'

In [None]:
# Load Local Files
raw_data_dir = Path("../data/raw")  # Go up one level from notebooks
processed_data_dir = Path("../data/processed")
processed_data_dir.mkdir(exist_ok=True)

# File mapping
file_mapping = {
    2009: "National_2009.xls",
    2014: "National_2014.xls", 
    2019: "National_2019.xls",
    2024: "National_2024.xls"
}

print("📁 Checking election files...")
for year, filename in file_mapping.items():
    file_path = raw_data_dir / filename
    if file_path.exists():
        size = file_path.stat().st_size / 1024
        print(f"✅ Found: {filename} ({size:.1f} KB)")
    else:
        print(f"❌ Missing: {filename}")

In [None]:
# Test reading one file
test_file = raw_data_dir / "National_2009.xls"

if test_file.exists():
    print(f"📊 Testing file: {test_file.name}")
    
    try:
        df = pd.read_excel(test_file, engine='openpyxl')
        print(f"✅ Successfully loaded! Shape: {df.shape}")
        
        print("\n📋 First 10 rows preview:")
        for i in range(min(10, len(df))):
            row_data = [str(cell)[:20] for cell in df.iloc[i] if pd.notna(cell) and str(cell).strip()]
            if len(row_data) > 1:
                print(f"  Row {i:2d}: {' | '.join(row_data[:4])}")
                
    except Exception as e:
        print(f"❌ Error reading file: {e}")
else:
    print("❌ Test file not found")