# Tokyo 2020 Analysis - GDP vs Population

The purpose of this analysis is to determine if a relationship with the medals won exists between the GDP and Population across the countries that partecipated the Games.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Import Medals dataset

In [None]:
medals= pd.read_csv("../input/2021-olympics-medals-in-tokyo/Tokyo Medals 2021.csv")

In [None]:
medals

Let's clean up the dataset


In [None]:
medals = medals.rename({'Country': 'Name'}, axis=1)

In [None]:
medals = medals.drop(['Rank By Total'], axis=1, inplace=False)

In [None]:
medals['Name'] = medals['Name'].replace({'United States of America': 'United States', 'People\'s Republic of China': 'China', 'Great Britain': 'United Kingdom', 'ROC': 'Russia', 'Republic of Korea': 'South Korea', 'Islamic Republic of Iran': 'Iran', 'Hong Kong, China': 'Hong Kong', 'Côte d\'Ivoire': 'Ivory Coast', 'Republic of Moldova': 'Moldova', 'Syrian Arab Republic': 'Syria'})

In [None]:
# Looks like Eritrea has been left out from the table. Let's add it in.
Eritrea = {'Name': 'Eritrea', 'Gold Medal': 0, 'Silver Medal': 0, 'Bronze Medal': 0, 'Total': 0}
medals = medals.append(Eritrea, ignore_index = True)

In [None]:
medals

In [None]:
medals.info()

## Import GDP dataset

The dataset has been dowbloaded from: https://worldpopulationreview.com/countries/countries-by-gdp

In [None]:
gdp= pd.read_csv("../input/world-countries-gdppopulation/World Countries GDP-population.csv")

In [None]:
gdp

In [None]:
gdp.isnull().sum()

In [None]:
gdp = gdp.drop(gdp.columns[[0, 2, 3]], axis=1)

In [None]:
gdp.info()

In [None]:
print(gdp.columns)

In [None]:
gdp['GDP Per Capita']=gdp['GDP Per Capita'].apply(lambda x: x.replace('$', '').replace(',', '')).astype('int')
gdp['Population']=gdp['Population'].str.replace(',', '').astype('int')

In [None]:
gdp.head()

In [None]:
gdp.info()

In [None]:
gdp['GDP Total'] = (gdp['GDP Per Capita'] * gdp['Population'])

In [None]:
gdp.head(50)

In [None]:
gdp.info()

## Merging the two datasets

In [None]:
table= pd.merge(gdp,medals,on='Name',how='outer')

In [None]:
table.info()

In [None]:
table

### Missing data

In [None]:
table = table.dropna(inplace=False)
table.reset_index(inplace=True)
table= table.drop(['index'], axis=1, inplace=False)

### Visualisation 

In [None]:
table

In [None]:
plt.figure(figsize=(8, 8))
sns.heatmap(table.corr(), annot=True, vmin=-1.0, vmax=1.0)
plt.show()

### Verdict

The heatmap determines a strong relationship (0.83) between the total number of medals and GDP.