In [1]:
# Python modules

import pandas as pd
import os

In [2]:
# Read Surface Temperature Change csv file into pandas dataframe
surface_temp_df = pd.read_csv('./Raw datasets/Annual_Surface_Temperature_Change.csv')
# Drop useless columns not needed for tidy dataframe
surface_temp_clean_df = surface_temp_df.drop(columns=['ObjectId', 'ISO2', 'ISO3', 'Indicator', 'Unit', 'Source', 'CTS_Code', 'CTS_Name', 'CTS_Full_Descriptor'])
# Melt Year measurements into a value column with column names under Year
surface_temp_clean_df = surface_temp_clean_df.melt(id_vars=['Country'], var_name='Year', value_name='Temperature')
# Replace the F letter in the Year column and make the column an integer value
surface_temp_clean_df['Year'] = surface_temp_clean_df['Year'].str.replace('F', '').astype(int)

# Save dataframe into csv
surface_temp_clean_df.to_csv('./Clean Datasets/surface_temp_clean.csv', index=False)
# Display top 5 rows
surface_temp_clean_df.head()

Unnamed: 0,Country,Year,Temperature
0,"Afghanistan, Islamic Rep. of",1961,-0.113
1,Albania,1961,0.627
2,Algeria,1961,0.164
3,American Samoa,1961,0.079
4,"Andorra, Principality of",1961,0.736


In [3]:
# Read climate related disasters frequency csv
climate_disasters_df = pd.read_csv('./Raw datasets/Climate-related_Disasters_Frequency.csv')
# Drop useless columns not needed for tidy dataframe
climate_disasters_clean_df = climate_disasters_df.drop(columns=['ObjectId', 'ISO2', 'ISO3', 'Unit', 'Source', 'CTS_Code', 'CTS_Name', 'CTS_Full_Descriptor'])
# Remove unecessary text in the indicator column
climate_disasters_clean_df['Indicator'] = climate_disasters_clean_df['Indicator'].str.replace('Climate related disasters frequency, Number of Disasters: ', '')
# Melt Year measurements into a value column with column names under Year
climate_disasters_clean_df = climate_disasters_clean_df.melt(id_vars=['Indicator', 'Country'], var_name='Year', value_name='value')
# Pivot the dataframe to make each indicator a measurement of the value column
climate_disasters_clean_df = climate_disasters_clean_df.pivot(index=['Year', 'Country'], columns='Indicator', values='value').reset_index()
# Replace the F letter in the Year column and make the column an integer value
climate_disasters_clean_df['Year'] = climate_disasters_clean_df['Year'].str.replace('F', '').astype(int)
# Fill NA's with zero's
climate_disasters_clean_df.fillna(0, inplace=True)
# Save dataframe into csv
climate_disasters_clean_df.to_csv('./Clean Datasets/climate_disasters_clean.csv', index=False)
# Display top 5 rows
climate_disasters_clean_df.head()

Indicator,Year,Country,Drought,Extreme temperature,Flood,Landslide,Storm,TOTAL,Wildfire
0,1980,"Afghanistan, Islamic Rep. of",0.0,0.0,1.0,0.0,0.0,1.0,0.0
1,1980,Albania,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1980,Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1980,American Samoa,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1980,Angola,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
climate_disasters_df = pd.read_csv('./Raw datasets/Climate-related_Disasters_Frequency.csv')
climate_disasters_clean_df = climate_disasters_df.drop(columns=['ObjectId', 'ISO2', 'ISO3', 'Unit', 'Source', 'CTS_Code', 'CTS_Name', 'CTS_Full_Descriptor'])
climate_disasters_clean_df['Indicator'] = climate_disasters_clean_df['Indicator'].str.replace('Climate related disasters frequency, Number of Disasters: ', '')
climate_disasters_clean_df = climate_disasters_clean_df.melt(id_vars=['Country', 'Indicator'], var_name='Year', value_name='value')
climate_disasters_clean_df = climate_disasters_clean_df.groupby(['Indicator', 'Country']).sum().reset_index()
climate_disasters_clean_df = climate_disasters_clean_df.pivot(index='Country', columns='Indicator', values='value').reset_index()
climate_disasters_clean_df = climate_disasters_clean_df.sort_values(by=['TOTAL'], ascending=False, axis=0)
climate_disasters_clean_df.to_csv('./Clean Datasets/country_climate_disasters_clean.csv', index=False)
climate_disasters_clean_df.head(20)

Indicator,Country,Drought,Extreme temperature,Flood,Landslide,Storm,TOTAL,Wildfire
203,United States,18.0,26.0,183.0,4.0,590.0,912.0,91.0
42,"China, P.R.: Mainland",36.0,14.0,295.0,74.0,311.0,738.0,8.0
87,India,9.0,52.0,265.0,46.0,147.0,523.0,4.0
149,Philippines,9.0,,144.0,30.0,294.0,478.0,1.0
88,Indonesia,7.0,,245.0,62.0,9.0,334.0,11.0
14,Bangladesh,3.0,24.0,84.0,6.0,132.0,249.0,
209,Vietnam,7.0,,98.0,6.0,124.0,236.0,1.0
121,Mexico,6.0,15.0,68.0,10.0,109.0,212.0,4.0
96,Japan,,17.0,36.0,16.0,129.0,199.0,1.0
25,Brazil,17.0,5.0,139.0,19.0,15.0,199.0,4.0
