In [4]:
import pandas as pd

# Step 1: Read the CSV file
print("Step 1: Reading CSV file...")
df = pd.read_csv('PUBFIN_20260106-085827.csv', header=None)

# Step 2: Add column names
print("Step 2: Adding column names...")
df.columns = ['Country', 'Technology', 'Year', 'Investment_Million_USD']

# Step 3: Remove the first row (the title row)
print("Step 3: Removing title row...")
df = df[1:]  # Skip first row

# Step 4: Fill blank Country cells with the value above
print("Step 4: Filling blank country names...")
df['Country'] = df['Country'].fillna(method='ffill')

# Step 5: Fill blank Technology cells with the value above
print("Step 5: Filling blank technology names...")
df['Technology'] = df['Technology'].fillna(method='ffill')

# Step 6: Replace '-' with 0 in investment column
print("Step 6: Replacing '-' with 0...")
df['Investment_Million_USD'] = df['Investment_Million_USD'].replace('-', 0)

# Step 7: Keep only rows with valid years
print("Step 7: Keeping only valid year rows...")
df = df[df['Year'].notna()]

# Step 8: Save cleaned data
print("Step 8: Saving cleaned data...")
df.to_csv('IRENA_cleaned.csv', index=False)

print("\n✓ Done! Cleaned data saved as 'IRENA_cleaned.csv'")
print(f"Total rows: {len(df)}")
print("\nFirst 10 rows of cleaned data:")
print(df.head(10))

Step 1: Reading CSV file...
Step 2: Adding column names...
Step 3: Removing title row...
Step 4: Filling blank country names...
Step 5: Filling blank technology names...
Step 6: Replacing '-' with 0...
Step 7: Keeping only valid year rows...
Step 8: Saving cleaned data...


  df['Country'] = df['Country'].fillna(method='ffill')
  df['Technology'] = df['Technology'].fillna(method='ffill')



✓ Done! Cleaned data saved as 'IRENA_cleaned.csv'
Total rows: 97083

First 10 rows of cleaned data:
        Country                  Technology    Year Investment_Million_USD
2   Afghanistan  On-grid solar photovoltaic  2000.0                      0
3   Afghanistan  On-grid solar photovoltaic  2001.0                   0.10
4   Afghanistan  On-grid solar photovoltaic  2002.0                      0
5   Afghanistan  On-grid solar photovoltaic  2003.0                      0
6   Afghanistan  On-grid solar photovoltaic  2004.0                      0
7   Afghanistan  On-grid solar photovoltaic  2005.0                      0
8   Afghanistan  On-grid solar photovoltaic  2006.0                   0.35
9   Afghanistan  On-grid solar photovoltaic  2007.0                      0
10  Afghanistan  On-grid solar photovoltaic  2008.0                      0
11  Afghanistan  On-grid solar photovoltaic  2009.0                      0
