# Notebook for removing all countries without Medal wins

In [131]:
import os.path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
%matplotlib inline
from bs4 import BeautifulSoup
import webbrowser
import urllib.request
from lxml import html
import zipfile
import re
import string
import sys, os
from IPython.display import Image

In [132]:
# Ensure the file exists
if not os.path.exists(  r"../../data/prep/Countries/countries_200.csv" ):
    print("Missing dataset file")

In [133]:
# read the countries df into a dataframe
df = pd.read_csv(  r"../../data/prep/Countries/countries_200.csv", encoding = "ISO-8859-1")

In [134]:
# printing the country df out looking at field names 
df.head(1)

Unnamed: 0,Country,Year,Population,Males,Females,Life_Expectancy,GDP,Region,Elevation,Area_SqKM,Centroid_Longitude,Centroid_Latitude,Population_Density,CO2_Emissions,Methane_Emissions,Nitrous_Oxide_Emisions,Total_Emissions,Emmisions_per_Capita
0,Afghanistan,1960,8996351.0,4649361.0,4346990.0,32.337561,537777800.0,West and Central Asia,1884.71,646212.0,66.1685,33.78231,13.921671,414.371,,,414.371,4.6e-05


In [135]:
# checking the number of null values we are dealing with
df.isnull().sum()

Country                      0
Year                         0
Population                   0
Males                        0
Females                      0
Life_Expectancy            455
GDP                        777
Region                       0
Elevation                  861
Area_SqKM                  861
Centroid_Longitude         861
Centroid_Latitude          861
Population_Density         861
CO2_Emissions              675
Methane_Emissions         1118
Nitrous_Oxide_Emisions    1102
Total_Emissions              0
Emmisions_per_Capita         0
dtype: int64

In [136]:
len(df)

4179

In [137]:
df.head(1)

Unnamed: 0,Country,Year,Population,Males,Females,Life_Expectancy,GDP,Region,Elevation,Area_SqKM,Centroid_Longitude,Centroid_Latitude,Population_Density,CO2_Emissions,Methane_Emissions,Nitrous_Oxide_Emisions,Total_Emissions,Emmisions_per_Capita
0,Afghanistan,1960,8996351.0,4649361.0,4346990.0,32.337561,537777800.0,West and Central Asia,1884.71,646212.0,66.1685,33.78231,13.921671,414.371,,,414.371,4.6e-05


# Removing countries that have not won any medals 
We don't need any information on any countries outside of the countries who won medals. 

In [138]:
# Ensure the file exists
if not os.path.exists(  r"../../data/raw/dictionary.csv" ):
    print("Missing dataset file")

In [139]:
# read the dictionary df into a dataframe
dfD = pd.read_csv(  r"../../data/raw/dictionary.csv", encoding = "ISO-8859-1")

In [140]:
dfD.head(1)

Unnamed: 0,Country,Code,Region
0,Afghanistan,AFG,West and Central Asia


In [141]:
# Ensure the file exists
if not os.path.exists(  r"..\..\data\prep\Games\Games-950.csv" ):
    print("Missing dataset file")

In [142]:
# read the medaldf df into a dataframe
Medaldf = pd.read_csv(  r"..\..\data\prep\Games\Games-950.csv", encoding = "ISO-8859-1")

In [143]:
Medaldf.head(1)

Unnamed: 0,Year,Host_Country,Host_City,Summer,Winter,Total_Males,Total_Females,Total_Athletes,Discipline,Sport,...,NOC,NOC_Males_Sent,NOC_Females_Sent,NOC_Total_Sent,NOC_Gold,NOC_Silver,NOC_Bronze,NOC_Total_Medals,NOC_Rating,NOC_Rank
0,1960,ITA,Rome,True,False,4727,611,5338,Sailing,Sailing,...,ARG,91.0,0.0,91.0,0,3,1,4,7,26


In [144]:
# These are the NOCs that matter 
Medaldf['NOC'].unique()

array(['ARG', 'AUS', 'AUT', 'BEL', 'BRA', 'BUL', 'BWI', 'CAN', 'DEN',
       'EGY', 'ESP', 'ETH', 'EUA', 'FIN', 'FRA', 'GBR', 'GHA', 'GRE',
       'HUN', 'IND', 'IRI', 'IRQ', 'ITA', 'JPN', 'MAR', 'MEX', 'NED',
       'NOR', 'NZL', 'PAK', 'POL', 'POR', 'ROU', 'RSA', 'SGP', 'SUI',
       'SWE', 'TCH', 'TPE', 'TUR', 'URS', 'USA', 'VEN', 'YUG', 'PRK',
       'BAH', 'CUB', 'IRL', 'KEN', 'KOR', 'NGR', 'PHI', 'TTO', 'TUN',
       'URU', 'FRG', 'GDR', 'CMR', 'JAM', 'MGL', 'UGA', 'COL', 'LBN',
       'NIG', 'LIE', 'BER', 'PUR', 'THA', 'GUY', 'TAN', 'ZIM', 'ALG',
       'CHN', 'CIV', 'DOM', 'ISL', 'PER', 'SYR', 'ZAM', 'AHO', 'CHI',
       'CRC', 'DJI', 'INA', 'ISV', 'SEN', 'SUR', 'CRO', 'EST', 'EUN',
       'GER', 'IOP', 'ISR', 'LAT', 'LTU', 'MAS', 'NAM', 'QAT', 'SLO',
       'LUX', 'BLR', 'KAZ', 'RUS', 'UKR', 'UZB', 'ARM', 'AZE', 'BDI',
       'CZE', 'ECU', 'GEO', 'HKG', 'MDA', 'MOZ', 'SVK', 'TGA', 'BAR',
       'KGZ', 'KSA', 'KUW', 'MKD', 'SRI', 'VIE', 'ERI', 'PAR', 'SCG',
       'UAE', 'AFG',

# Getting the NOCs of each country in the Country df
To do this I'll create a dictionary containing all the countries as keys and their NOCs as values. 

In [145]:
 countrydf = df

In [146]:
countrydf.columns

Index(['Country', 'Year', 'Population', 'Males', 'Females', 'Life_Expectancy',
       'GDP', 'Region', 'Elevation', 'Area_SqKM', 'Centroid_Longitude',
       'Centroid_Latitude', 'Population_Density', 'CO2_Emissions',
       'Methane_Emissions', 'Nitrous_Oxide_Emisions', 'Total_Emissions',
       'Emmisions_per_Capita'],
      dtype='object')

In [147]:
# creating the dictionary 
countrydic = {}

In [148]:
# For that fills the country name, NOC dictionary 
dfD['Dic'] = None 

for x, row in dfD.iterrows():
    
    country = dfD['Country'].iloc[x]
    code = dfD['Code'].iloc[x]
    
    countrydic.update({country: code})

In [149]:
countrydf.head(1)

Unnamed: 0,Country,Year,Population,Males,Females,Life_Expectancy,GDP,Region,Elevation,Area_SqKM,Centroid_Longitude,Centroid_Latitude,Population_Density,CO2_Emissions,Methane_Emissions,Nitrous_Oxide_Emisions,Total_Emissions,Emmisions_per_Capita
0,Afghanistan,1960,8996351.0,4649361.0,4346990.0,32.337561,537777800.0,West and Central Asia,1884.71,646212.0,66.1685,33.78231,13.921671,414.371,,,414.371,4.6e-05


In [150]:
countrydic = {'Afghanistan': 'AFG',
 'Albania': 'ALB',
 'Algeria': 'ALG',
 'American Samoa': 'ASA',
 'Andorra': 'AND',
 'Angola': 'ANG',
 'Antigua and Barbuda': 'ANT',
 'Argentina': 'ARG',
 'Armenia': 'ARM',
 'Aruba': 'ARU',
 'Australia': 'AUS',
 'Austria': 'AUT',
 'Azerbaijan': 'AZE',
 'Bahamas': 'BAH',
 'Bahamas, The' : 'BAH',       
 'Bahrain': 'BRN',
 'Bangladesh': 'BAN',
 'Barbados': 'BAR',
 'Belarus': 'BLR',
 'Belgium': 'BEL',
 'Belize': 'BIZ',
 'Benin': 'BEN',
 'Bermuda': 'BER',
 'Bhutan': 'BHU',
 'Bolivia': 'BOL',
 'Bosnia and Herzegovina': 'BIH',
 'Botswana': 'BOT',
 'Brazil': 'BRA',
 'British Virgin Islands': 'IVB',
 'Brunei Darussalam': 'BRU',
 'Bulgaria': 'BUL',
 'Burkina Faso': 'BUR',
 'Burundi': 'BDI',
 'Cabo Verde': 'CPV',
 'Cambodia': 'CAM',
 'Cameroon': 'CMR',
 'Canada': 'CAN',
 'Cayman Islands': 'CAY',
 'Central African Republic': 'CAF',
 'Chad': 'CHA',
 'Chile': 'CHI',
 'China': 'CHN',
 'Colombia': 'COL',
 'Comoros': 'COM',
 'Congo, Dem Rep': 'COD', 
 'Congo, Dem. Rep.': 'COD', 
 'Costa Rica': 'CRC',
 "Cote d'Ivoire": 'CIV',
 'Croatia': 'CRO',
 'Cuba': 'CUB',
 'Curacao': 'CUW',
 'Cyprus': 'CYP',
 'Czech Republic': 'CZE',
 'Denmark': 'DEN',
 'Djibouti': 'DJI',
 'Dominica': 'DMA',
 'Dominican Republic': 'DOM',
 'Ecuador': 'ECU',
 'Egypt Arab Rep': 'EGY',
 'Egypt, Arab Rep.': 'EGY',
 'El Salvador': 'ESA',
 'Equatorial Guinea': 'GEQ',
 'Eritrea': 'ERI',
 'Estonia': 'EST',
 'Ethiopia': 'ETH',
 'Fiji': 'FIJ',
 'Finland': 'FIN',
 'France': 'FRA',
 'Gabon': 'GAB',
 'Gambia': 'GAM',
 'Gambia, The': 'GAM',
 'Georgia': 'GEO',
 'Germany': 'GER',
 'Ghana': 'GHA',
 'Greece': 'GRE',
 'Grenada': 'GRN',
 'Guam': 'GUM',
 'Guatemala': 'GUA',
 'Guinea': 'GUI',
 'Guinea-Bissau': 'GBS',
 'Guyana': 'GUY',
 'Haiti': 'HAI',
 'Honduras': 'HON',
 'Hong Kong SAR, China': 'HKG',
 'Hungary': 'HUN',
 'Iceland': 'ISL',
 'India': 'IND',
 'Indonesia': 'INA',
 'Iran, Islamic Rep.': 'IRI',
 'Iraq': 'IRQ',
 'Ireland': 'IRL',
 'Israel': 'ISR',
 'Italy': 'ITA',
 'Jamaica': 'JAM',
 'Japan': 'JPN',
 'Jordan': 'JOR',
 'Kazakhstan': 'KAZ',
 'Kenya': 'KEN',
 "Korea, Dem. People\x92s Rep.": 'PRK',
 'Korea, Dem. People?s Rep.': 'PRK',
 'Korea, Rep.': 'KOR',
 'Kosovo': 'KOS',
 'Kuwait': 'KUW',
 'Kyrgyz Republic': 'KGZ',
 'Lao PDR': 'LAO',
 'Latvia': 'LAT',
 'Lebanon': 'LIB',
 'Lesotho': 'LES',
 'Liberia': 'LBR',
 'Libya': 'LBA',
 'Liechtenstein': 'LIE',
 'Lithuania': 'LTU',
 'Luxembourg': 'LUX',
 'Macedonia, FYR': 'MKD',
 'Madagascar': 'MAD',
 'Malawi': 'MAW',
 'Malaysia': 'MAS',
 'Maldives': 'MDV',
 'Mali': 'MLI',
 'Malta': 'MLT',
 'Mauritania': 'MTN',
 'Mauritius': 'MRI',
 'Mexico': 'MEX',
 'Micronesia Fed. Sts.': 'FSM',
 'Micronesia, Fed. Sts.': 'FSM', 
 'Moldova': 'MDA',
 'Monaco': 'MON',
 'Mongolia': 'MGL',
 'Montenegro': 'MNE',
 'Morocco': 'MAR',
 'Mozambique': 'MOZ',
 'Myanmar': 'MYA',
 'Namibia': 'NAM',
 'Nauru': 'NRU',
 'Nepal': 'NEP',
 'Netherlands': 'NED',
 'New Zealand': 'NZL',
 'Nicaragua': 'NCA',
 'Niger': 'NIG',
 'Nigeria': 'NGR',
 'Norway': 'NOR',
 'Oman': 'OMA',
 'Pakistan': 'PAK',
 'Palau': 'PLW',
 'Panama': 'PAN',
 'Papua New Guinea': 'PNG',
 'Paraguay': 'PAR',
 'Peru': 'PER',
 'Philippines': 'PHI',
 'Poland': 'POL',
 'Portugal': 'POR',
 'Puerto Rico': 'PUR',
 'Qatar': 'QAT',
 'Romania': 'ROU',
 'Russian Federation': 'RUS',
 'Rwanda': 'RWA',
 'Samoa': 'SAM',
 'San Marino': 'SMR',
 'Sao Tome and Principe': 'STP',
 'Saudi Arabia': 'KSA',
 'Senegal': 'SEN',
 'Serbia': 'SRB',
 'Seychelles': 'SEY',
 'Sierra Leone': 'SLE',
 'Singapore': 'SGP',
 'Slovak Republic': 'SVK',
 'Slovenia': 'SLO',
 'Solomon Islands': 'SOL',
 'Somalia': 'SOM',
 'South Africa': 'RSA',
 'Spain': 'ESP',
 'Sri Lanka': 'SRI',
 'St. Kitts and Nevis': 'SKN',
 'St. Lucia': 'LCA',
 'St. Vincent and the Grenadines': 'VIN',
 'Sudan': 'SUD',
 'Suriname': 'SUR',
 'Swaziland': 'SWZ',
 'Sweden': 'SWE',
 'Switzerland': 'SUI',
 'Syrian Arab Republic': 'SYR',
 'Tajikistan': 'TJK',
 'Tanzania': 'TAN',
 'Thailand': 'THA',
 'Timor-Leste': 'TLS',
 'Togo': 'TOG',
 'Tonga': 'TGA',
 'Trinidad and Tobago': 'TTO',
 'Tunisia': 'TUN',
 'Turkey': 'TUR',
 'Turkmenistan': 'TKM',
 'Uganda': 'UGA',
 'Ukraine': 'UKR',
 'United Arab Emirates': 'UAE',
 'United Kingdom': 'GBR',
 'United States': 'USA',
 'Uruguay': 'URU',
 'Uzbekistan': 'UZB',
 'Vanuatu': 'VAN',
 'Venezuela': 'VEN',
 'Venezuela, RB' : 'VEN',
 'Vietnam': 'VIE',
 'Virgin Islands (US)': 'ISV',
 'Virgin Islands (U.S.)': 'ISV',
 'Yemen, Rep.': 'YEM',
 'Zambia': 'ZAM',
 'Zimbabwe': 'ZIM'}

In [151]:
# adding all the NOCs from the dictionary into the countrydf 
countrydf['Code'] = None 

for x, row in countrydf.iterrows():
    
    country = countrydf['Country'].iloc[x]
    countrydf.loc[x, 'Code'] = countrydic[country]

# Countries that change Nocs they are competing for
The are a few cases were countires like germany and soviet union split and different countries within there bounds end up complete for different teams like the Unified olympic team. 
We'll will not address the issues of countries playing for different countries in this notebook for now we will just leave them in. 
The list below represents the countires that are invloved in cases where NOCs switch. 

* All these team switch NOCs or join another at one point.
* We'll get the unique conturies and leave them in the country df and get values for their factors then in another notebook we can join them later to their respective NOCs. 

In [152]:
members = ['Armenia','Azerbaijan','Belarus','Georgia','Kazakhstan','Kyrgyz Republic','Moldova',
           'Russian Federation','Tajikistan','Turkmenistan','Ukraine','Uzbekistan','Latvia', 
            'Armenia','Belarus','Kazakhstan','Russian Federation','Uzbekistan',
            'Serbia','Montenegro','Macedonia, FYR','Bosnia and Herzegovina','Slovenia','Croatia', 
            'Armenia', 'Belarus', 'Georgia', 'Kazakhstan', 'Kyrgyz Republic', 
            'Moldova','Russian Federation', 'Ukraine', 'Uzbekistan', 'Czech Republic','Slovak Republic', 'Serbia','Montenegro', 
           'Estonia', 'Lativa', 'Lithuania']

In [153]:
# now we can get the unique countries because some repeat in this list
# We'll do this by changing it to a set then back to a list 
otherCslist = list(set(members))

In [154]:
# create a df for other countires above so we can get their country codes, by joining to the dictionary df  
otherCsdf = pd.DataFrame(columns=['Country'])
otherCsdf['Country'] = otherCslist

In [155]:
# now we can join this to the dictonary and get all its Country codes 
otherCsdf.head(1)

Unnamed: 0,Country
0,Slovak Republic


In [156]:
len(otherCsdf)

24

In [157]:
# creating a temp dictionary so we can get the country codes for only the countries in this otherCsList
tempDict = dfD
# then we are removing all the rows with countries that are not in the otherCslist and creating a df
# this df contains all the countries and country codes of the country that change NOcs that we need to keep with the Medaldf
otherCsdf = tempDict[tempDict['Country'].isin(otherCslist)].reset_index()
# Now we can remove the old index field because it is redudant 
otherCsdf = otherCsdf.drop(otherCsdf.columns[[0]], axis=1)

In [158]:
otherCsdf['Code'].unique()

array(['ARM', 'AZE', 'BLR', 'BIH', 'CRO', 'CZE', 'EST', 'GEO', 'KAZ',
       'KGZ', 'LAT', 'LTU', 'MKD', 'MDA', 'RUS', 'SRB', 'SVK', 'SLO',
       'TJK', 'TKM', 'UKR', 'UZB', 'MNE'], dtype=object)

In [159]:
otherCslist = ['ARM', 'AZE', 'BLR', 'BIH', 'CRO', 'CZE', 'GEO', 'KAZ', 'KGZ',
       'LAT', 'MKD', 'MDA', 'RUS', 'SRB', 'SVK', 'SLO', 'TJK', 'TKM',
       'UKR', 'UZB', 'MNE']

# Adding the two lists so we'll have a list of only the countries we need for the final country df
So we'll add this otherCslist to the unique Nocs in the medalDf and we'll have a list all of these countires 

In [160]:
Medaldf['NOC'].unique()

array(['ARG', 'AUS', 'AUT', 'BEL', 'BRA', 'BUL', 'BWI', 'CAN', 'DEN',
       'EGY', 'ESP', 'ETH', 'EUA', 'FIN', 'FRA', 'GBR', 'GHA', 'GRE',
       'HUN', 'IND', 'IRI', 'IRQ', 'ITA', 'JPN', 'MAR', 'MEX', 'NED',
       'NOR', 'NZL', 'PAK', 'POL', 'POR', 'ROU', 'RSA', 'SGP', 'SUI',
       'SWE', 'TCH', 'TPE', 'TUR', 'URS', 'USA', 'VEN', 'YUG', 'PRK',
       'BAH', 'CUB', 'IRL', 'KEN', 'KOR', 'NGR', 'PHI', 'TTO', 'TUN',
       'URU', 'FRG', 'GDR', 'CMR', 'JAM', 'MGL', 'UGA', 'COL', 'LBN',
       'NIG', 'LIE', 'BER', 'PUR', 'THA', 'GUY', 'TAN', 'ZIM', 'ALG',
       'CHN', 'CIV', 'DOM', 'ISL', 'PER', 'SYR', 'ZAM', 'AHO', 'CHI',
       'CRC', 'DJI', 'INA', 'ISV', 'SEN', 'SUR', 'CRO', 'EST', 'EUN',
       'GER', 'IOP', 'ISR', 'LAT', 'LTU', 'MAS', 'NAM', 'QAT', 'SLO',
       'LUX', 'BLR', 'KAZ', 'RUS', 'UKR', 'UZB', 'ARM', 'AZE', 'BDI',
       'CZE', 'ECU', 'GEO', 'HKG', 'MDA', 'MOZ', 'SVK', 'TGA', 'BAR',
       'KGZ', 'KSA', 'KUW', 'MKD', 'SRI', 'VIE', 'ERI', 'PAR', 'SCG',
       'UAE', 'AFG',

In [161]:
MedaldfCs = ['ARG', 'AUS', 'AUT', 'BEL', 'BRA', 'BUL', 'BWI', 'CAN', 'DEN',
       'EGY', 'ESP', 'ETH', 'EUA', 'FIN', 'FRA', 'GBR', 'GHA', 'GRE',
       'HUN', 'IND', 'IRI', 'IRQ', 'ITA', 'JPN', 'MAR', 'MEX', 'NED',
       'NOR', 'NZL', 'PAK', 'POL', 'POR', 'ROU', 'RSA', 'SGP', 'SUI',
       'SWE', 'TCH', 'TPE', 'TUR', 'URS', 'USA', 'VEN', 'YUG', 'PRK',
       'BAH', 'CUB', 'IRL', 'KEN', 'KOR', 'NGR', 'PHI', 'TTO', 'TUN',
       'URU', 'FRG', 'GDR', 'CMR', 'JAM', 'MGL', 'UGA', 'COL', 'LBN',
       'NIG', 'LIE', 'BER', 'PUR', 'THA', 'GUY', 'TAN', 'ZIM', 'ALG',
       'CHN', 'CIV', 'DOM', 'ISL', 'PER', 'SYR', 'ZAM', 'AHO', 'CHI',
       'CRC', 'DJI', 'INA', 'ISV', 'SEN', 'SUR', 'CRO', 'EST', 'EUN',
       'GER', 'IOP', 'ISR', 'LAT', 'LTU', 'MAS', 'NAM', 'QAT', 'SLO',
       'LUX', 'BLR', 'KAZ', 'RUS', 'UKR', 'UZB', 'ARM', 'AZE', 'BDI',
       'CZE', 'ECU', 'GEO', 'HKG', 'MDA', 'MOZ', 'SVK', 'TGA', 'BAR',
       'KGZ', 'KSA', 'KUW', 'MKD', 'SRI', 'VIE', 'ERI', 'PAR', 'SCG',
       'UAE', 'AFG', 'MRI', 'PAN', 'SAM', 'SRB', 'SUD', 'TJK', 'TOG',
       'BOT', 'BRN', 'CYP', 'GAB', 'GRN', 'GUA', 'MNE', 'FIJ', 'IOA',
       'JOR', 'KOS', 'OAR']

In [162]:
# this is a list of the relevant countries 
RelCs = list(set(otherCslist + MedaldfCs))

In [163]:
RelCs

['ROU',
 'USA',
 'VIE',
 'BRN',
 'TJK',
 'AUS',
 'EST',
 'GHA',
 'IND',
 'ISV',
 'GDR',
 'TOG',
 'QAT',
 'IRQ',
 'DEN',
 'SUI',
 'GER',
 'TAN',
 'UGA',
 'SUD',
 'UKR',
 'INA',
 'FIJ',
 'KAZ',
 'CIV',
 'ERI',
 'NOR',
 'BLR',
 'BRA',
 'JAM',
 'PUR',
 'ECU',
 'KUW',
 'EUN',
 'TKM',
 'LAT',
 'GEO',
 'MGL',
 'CRC',
 'BOT',
 'TCH',
 'JOR',
 'ISR',
 'IOA',
 'DOM',
 'BAR',
 'LIE',
 'CUB',
 'RUS',
 'KEN',
 'DJI',
 'SGP',
 'CHI',
 'SAM',
 'GBR',
 'MDA',
 'BER',
 'SUR',
 'LBN',
 'KSA',
 'MNE',
 'CRO',
 'FRG',
 'IOP',
 'GAB',
 'ETH',
 'SCG',
 'ALG',
 'URU',
 'TTO',
 'KOS',
 'EUA',
 'BAH',
 'ITA',
 'KOR',
 'CZE',
 'CAN',
 'MKD',
 'OAR',
 'CYP',
 'GRE',
 'BUL',
 'CMR',
 'PER',
 'SRI',
 'NAM',
 'ISL',
 'SVK',
 'BIH',
 'SEN',
 'MEX',
 'NGR',
 'PHI',
 'ARG',
 'HUN',
 'BEL',
 'PAK',
 'MRI',
 'SRB',
 'ZIM',
 'IRL',
 'POL',
 'URS',
 'NED',
 'POR',
 'KGZ',
 'NIG',
 'TPE',
 'CHN',
 'PAN',
 'COL',
 'YUG',
 'RSA',
 'THA',
 'EGY',
 'TGA',
 'AZE',
 'FRA',
 'SYR',
 'TUR',
 'MOZ',
 'ARM',
 'LTU',
 'MAS',
 'HKG',


# Finally removing all the irrelevant countries from the country df
We'll remove all the rows from the country df that are not in this RelCs list above

In [164]:
countrydf = countrydf[countrydf['Code'].isin(RelCs)].reset_index()
# Now we can remove the old index field because it is redudant 
countrydf = countrydf.drop(countrydf.columns[[0]], axis=1)

# This country dataFrame now only contains the relevant countires who have won medals

In [165]:
df = countrydf
df.head(1)

Unnamed: 0,Country,Year,Population,Males,Females,Life_Expectancy,GDP,Region,Elevation,Area_SqKM,Centroid_Longitude,Centroid_Latitude,Population_Density,CO2_Emissions,Methane_Emissions,Nitrous_Oxide_Emisions,Total_Emissions,Emmisions_per_Capita,Code
0,Afghanistan,1960,8996351.0,4649361.0,4346990.0,32.337561,537777800.0,West and Central Asia,1884.71,646212.0,66.1685,33.78231,13.921671,414.371,,,414.371,4.6e-05,AFG


In [166]:
df.to_csv( r"../../data/prep/Countries/countries_250.csv", index=False)