Countries Analysis

In [1]:
import pandas as pd
import requests
import json
import awswrangler as wr

In [2]:
# Fecthing the dat from api

api_url = 'https://restcountries.com/v3.1/all'
response = requests.get(api_url)
data = response.json()

In [3]:
# normalizing the JSON data 

df = pd.json_normalize(data)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Columns: 897 entries, tld to languages.lit
dtypes: bool(3), float64(20), int64(1), object(873)
memory usage: 1.7+ MB


In [4]:
# Dropping the unecessary columns 

# Multi column dropping
df_cols_to_drop = [col for col in df.columns if 'demonyms' in col or 'translations' in col or 'currencies' in col or 'coatOfArms' in col or 'postalCode' in col or 'name.nativeName' in col or 'name.tsn' in col or 'idd' in col or 'altSpellings' in col or 'gini' in col  or 'languages' in col]
df.drop(columns=df_cols_to_drop, inplace=True)

# Single column dropping 
df.drop(columns='flag', inplace = True)


In [5]:
# No concatenating of outputs
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_seq_item', None)

In [6]:
# Single column cleaning

df['continents'] = df["continents"].astype(str).str.strip('[]\'')
df['capital'] = df["capital"].apply(str).str.strip('[]\'\"')

# Apply to the whole of dataframe
for col in df.columns:
    df[col] = df[col].apply(lambda col: str(col).strip('[]\'\"'))

df['borders'] = df['borders'].astype(str).str.replace("'","",regex= False)

df.head(1)

Unnamed: 0,tld,cca2,ccn3,cioc,independent,status,unMember,capital,region,subregion,latlng,landlocked,borders,area,cca3,population,fifa,timezones,continents,startOfWeek,name.common,name.official,maps.googleMaps,maps.openStreetMaps,car.signs,car.side,flags.png,flags.svg,flags.alt,capitalInfo.latlng
0,.bw,BW,72,BOT,True,officially-assigned,True,Gaborone,Africa,Southern Africa,"-22.0, 24.0",True,"NAM, ZAF, ZMB, ZWE",582000.0,BWA,2351625,BOT,UTC+02:00,Africa,monday,Botswana,Republic of Botswana,https://goo.gl/maps/E364KeLy6N4JwxwQ8,https://www.openstreetmap.org/relation/1889339,BW,left,https://flagcdn.com/w320/bw.png,https://flagcdn.com/bw.svg,The flag of Botswana has a light blue field with a white-edged black horizontal band across its center.,"-24.63, 25.9"


In [7]:
# Splitting of column in seperate for expansion of columns

df[['Latitude', 'Longitude']] = df['latlng'].str.split(',', n = 1, expand = True)
df[['Capital latitude', 'Capital Longitude']] = df['capitalInfo.latlng'].str.split(',', n = 1, expand = True)


columnsDropped = ['latlng', 'capitalInfo.latlng']
df.drop(columns = 'latlng', inplace = True)
df.head(1)

Unnamed: 0,tld,cca2,ccn3,cioc,independent,status,unMember,capital,region,subregion,landlocked,borders,area,cca3,population,fifa,timezones,continents,startOfWeek,name.common,name.official,maps.googleMaps,maps.openStreetMaps,car.signs,car.side,flags.png,flags.svg,flags.alt,capitalInfo.latlng,Latitude,Longitude,Capital latitude,Capital Longitude
0,.bw,BW,72,BOT,True,officially-assigned,True,Gaborone,Africa,Southern Africa,True,"NAM, ZAF, ZMB, ZWE",582000.0,BWA,2351625,BOT,UTC+02:00,Africa,monday,Botswana,Republic of Botswana,https://goo.gl/maps/E364KeLy6N4JwxwQ8,https://www.openstreetmap.org/relation/1889339,BW,left,https://flagcdn.com/w320/bw.png,https://flagcdn.com/bw.svg,The flag of Botswana has a light blue field with a white-edged black horizontal band across its center.,"-24.63, 25.9",-22.0,24.0,-24.63,25.9


In [8]:
# Capital first letter in startOfWeek Column

def weekConversion(days):
    weekday = ""
    for i in range(0,len(days)):
        if i == 0:
            i = str(days[i]).upper()
            weekday += i
        else:
            weekday += days[i]
    return weekday
    
df['startOfWeek'] = df['startOfWeek'].apply(weekConversion)

In [9]:
df['status'].value_counts()

officially-assigned    249
user-assigned            1
Name: status, dtype: int64

In [10]:
df_cleaned_data = df.copy()

In [None]:
wr.s3.to_parquet(df_cleaned_data,path="your_bucket_path",index=False)

{'paths': ['s3://countrystore1/projdata/cleanedData.parquet'],
 'partitions_values': {}}

In [None]:

# Read from S3
df_New = wr.s3.read_parquet("your_bucket_path")
print(df_New.head())


   tld cca2 ccn3 cioc independent               status unMember     capital  \
0  .bw   BW  072  BOT        True  officially-assigned     True    Gaborone   
1  .to   TO  776  TGA        True  officially-assigned     True  Nuku'alofa   
2  .gr   GR  300  GRE        True  officially-assigned     True      Athens   
3  .mh   MH  584  MHL        True  officially-assigned     True      Majuro   
4  .by   BY  112  BLR        True  officially-assigned     True       Minsk   

    region        subregion landlocked                  borders      area  \
0   Africa  Southern Africa       True       NAM, ZAF, ZMB, ZWE  582000.0   
1  Oceania        Polynesia      False                      nan     747.0   
2   Europe  Southern Europe      False       ALB, BGR, TUR, MKD  131990.0   
3  Oceania       Micronesia      False                      nan     181.0   
4   Europe   Eastern Europe       True  LVA, LTU, POL, RUS, UKR  207600.0   

  cca3 population fifa  timezones continents startOfWeek      