In [21]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)

# Define column names
col_names = ['ID', "LATITUDE", "LONGITUDE", "ELEVATION", "STATION"]
# Define value and flag names for each month

# Define column widths
col_widths = [12, 8, 10, 8, 25]

# Read the file
df = pd.read_fwf('EXPLOREDATA/ghcnm.tavg.v4.0.1.20230609.qfe.inv', widths=col_widths, names=col_names)

In [22]:
df.head()

Unnamed: 0,ID,LATITUDE,LONGITUDE,ELEVATION,STATION
0,ACW00011604,57.7667,11.8667,18.0,SAVE
1,AE000041196,25.333,55.517,34.0,SHARJAH_INTER_AIRP
2,AEM00041184,25.617,55.933,31.0,RAS_AL_KHAIMAH_INTE
3,AEM00041194,25.255,55.364,10.4,DUBAI_INTL
4,AEM00041216,24.43,54.47,3.0,ABU_DHABI_BATEEN_AIR


In [23]:
# Create new column for country code
df["Country Code"] = df["ID"].str[:2]

In [24]:
df.head()

Unnamed: 0,ID,LATITUDE,LONGITUDE,ELEVATION,STATION,Country Code
0,ACW00011604,57.7667,11.8667,18.0,SAVE,AC
1,AE000041196,25.333,55.517,34.0,SHARJAH_INTER_AIRP,AE
2,AEM00041184,25.617,55.933,31.0,RAS_AL_KHAIMAH_INTE,AE
3,AEM00041194,25.255,55.364,10.4,DUBAI_INTL,AE
4,AEM00041216,24.43,54.47,3.0,ABU_DHABI_BATEEN_AIR,AE


In [25]:
countryCodes = pd.read_csv('COUNTRYDATA/COUNTRIES.csv')
countryCodes.head()

Unnamed: 0,Country Code,Country
0,AA,Aruba
1,AC,Antigua and Barbuda
2,AE,United Arab Emirates
3,AF,Afghanistan
4,AG,Algeria


In [26]:
# Merge the dataframes and drop the ID column
df = pd.merge(df, countryCodes, on='Country Code').drop('Country Code', axis=1)

In [20]:
df.head()

Unnamed: 0,STATION,ID,Country_x,LATITUDE,LONGITUDE,ELEVATION,Country_y
0,SAVE,ACW00011604,Antigua and Barbuda,57.7667,11.8667,18.0,Antigua and Barbuda
1,SAVE,ACW00011604,Antigua and Barbuda,57.7667,11.8667,18.0,Antigua and Barbuda
2,SAVE,ACW00011604,Antigua and Barbuda,57.7667,11.8667,18.0,Antigua and Barbuda
3,SAVE,ACW00011604,Antigua and Barbuda,57.7667,11.8667,18.0,Antigua and Barbuda
4,SHARJAH_INTER_AIRP,AE000041196,United Arab Emirates,25.333,55.517,34.0,United Arab Emirates


In [28]:
# Reorder columns
# STATION ID COUNTRY CODE COUNTRY LATITUDE LONGITUDE ELEVATION
df = df[['STATION', 'ID', 'Country', 'LATITUDE', 'LONGITUDE', 'ELEVATION']]

In [29]:
df.head()

Unnamed: 0,STATION,ID,Country,LATITUDE,LONGITUDE,ELEVATION
0,SAVE,ACW00011604,Antigua and Barbuda,57.7667,11.8667,18.0
1,SAVE,ACW00011604,Antigua and Barbuda,57.7667,11.8667,18.0
2,SHARJAH_INTER_AIRP,AE000041196,United Arab Emirates,25.333,55.517,34.0
3,SHARJAH_INTER_AIRP,AE000041196,United Arab Emirates,25.333,55.517,34.0
4,RAS_AL_KHAIMAH_INTE,AEM00041184,United Arab Emirates,25.617,55.933,31.0


In [30]:
# SAVE THE DATAFRAME BY SEPARATING THE DATA BY COUNTRY
for country in df['Country'].unique():
    df[df['Country'] == country].to_csv(f'STATION/{country}.csv', index=False)