In [1]:
import pandas as pd

# Step 1: Read the cleaned data
print("Step 1: Reading data...")
df = pd.read_csv('IRENA_cleaned.csv')
print(f"   Total rows: {len(df)}")

# Step 2: List of Sub-Saharan African countries
print("\nStep 2: Listing Sub-Saharan countries...")

# All countries we want to keep
ssa_countries = [
    # Central Africa
    'Angola', 'Cameroon', 'Central African Republic', 'Chad', 'Congo',
    'Democratic Republic of the Congo', 'Equatorial Guinea', 'Gabon',
    'Sao Tome and Principe',

    # East Africa
    'Burundi', 'Comoros', 'Djibouti', 'Eritrea', 'Ethiopia', 'Kenya',
    'Madagascar', 'Malawi', 'Mauritius', 'Mozambique', 'Rwanda',
    'Seychelles', 'Somalia', 'South Sudan', 'Uganda',
    'United Republic of Tanzania', 'Tanzania', 'Zambia', 'Zimbabwe',

    # Southern Africa
    'Botswana', 'Eswatini', 'Lesotho', 'Namibia', 'South Africa', 'Swaziland',

    # West Africa
    'Benin', 'Burkina Faso', 'Cabo Verde', 'Cape Verde', "Côte d'Ivoire",
    'Ivory Coast', 'Gambia', 'Ghana', 'Guinea', 'Guinea-Bissau', 'Liberia',
    'Mali', 'Mauritania', 'Niger', 'Nigeria', 'Senegal', 'Sierra Leone', 'Togo'
]

print(f"   Total SSA countries: {len(ssa_countries)}")

# Step 3: Keep only Sub-Saharan African countries
print("\nStep 3: Filtering for SSA countries...")
df_ssa = df[df['Country'].isin(ssa_countries)]
print(f"   Rows after filtering: {len(df_ssa)}")

# Step 4: Keep only renewable technologies
print("\nStep 4: Filtering for renewable technologies...")

# Words that mean renewable
renewable_words = ['solar', 'wind', 'hydro', 'geothermal', 'bioenergy', 'biomass']

# Check each row if technology is renewable
keep_rows = []
for index, row in df_ssa.iterrows():
    technology = str(row['Technology']).lower()  # Make lowercase

    # Check if any renewable word is in the technology name
    is_renewable = False
    for word in renewable_words:
        if word in technology:
            is_renewable = True
            break

    keep_rows.append(is_renewable)

# Keep only renewable rows
df_renewable = df_ssa[keep_rows]
print(f"   Rows after filtering: {len(df_renewable)}")

# Step 5: Save the filtered data
print("\nStep 5: Saving filtered data...")
df_renewable.to_csv('SSA_renewable_investments.csv', index=False)

# Step 6: Show summary
print("\n✓ Done! File saved as 'SSA_renewable_investments.csv'")
print(f"\nSummary:")
print(f"  Total rows: {len(df_renewable)}")
print(f"  Countries found: {df_renewable['Country'].nunique()}")
print(f"  Years: {int(df_renewable['Year'].min())} to {int(df_renewable['Year'].max())}")

print("\nFirst 10 rows:")
print(df_renewable.head(10))

Step 1: Reading data...
   Total rows: 97083

Step 2: Listing Sub-Saharan countries...
   Total SSA countries: 52

Step 3: Filtering for SSA countries...
   Rows after filtering: 19803

Step 4: Filtering for renewable technologies...
   Rows after filtering: 6888

Step 5: Saving filtered data...

✓ Done! File saved as 'SSA_renewable_investments.csv'

Summary:
  Total rows: 6888
  Countries found: 41
  Years: 2000 to 2020

First 10 rows:
     Country                  Technology    Year  Investment_Million_USD
1449  Angola  On-grid solar photovoltaic  2000.0                    0.00
1450  Angola  On-grid solar photovoltaic  2001.0                    0.00
1451  Angola  On-grid solar photovoltaic  2002.0                    0.00
1452  Angola  On-grid solar photovoltaic  2003.0                    0.05
1453  Angola  On-grid solar photovoltaic  2004.0                    0.00
1454  Angola  On-grid solar photovoltaic  2005.0                    0.00
1455  Angola  On-grid solar photovoltaic  2006.0