In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from deep_translator import GoogleTranslator as translator
import pickle
import plotly.express as px

### Translation of Medals data

In [2]:
medals = pd.read_csv("data_csv/medailles.csv")
medals.head()

Unnamed: 0,Place,Pays,Or,Argent,Bronze,Total,Lieu,Annee
0,1,Etats Unis,38,41,34,113,Tokyo,2021
1,2,Chine,36,32,18,86,Tokyo,2021
2,3,Japon,27,12,16,55,Tokyo,2021
3,4,Grande Bretagne,22,21,22,65,Tokyo,2021
4,5,Russie,20,28,23,71,Tokyo,2021


In [3]:
# let's change the column names of the medals dataframe to Place, Country, Gold, Silver, Bronze, Total, Location, Year
medals.columns = ['Place', 'Country', 'Gold', 'Silver', 'Bronze', 'Total', 'Location', 'Year']
medals.head(15)

Unnamed: 0,Place,Country,Gold,Silver,Bronze,Total,Location,Year
0,1,Etats Unis,38,41,34,113,Tokyo,2021
1,2,Chine,36,32,18,86,Tokyo,2021
2,3,Japon,27,12,16,55,Tokyo,2021
3,4,Grande Bretagne,22,21,22,65,Tokyo,2021
4,5,Russie,20,28,23,71,Tokyo,2021
5,6,Australie,17,7,21,45,Tokyo,2021
6,7,Pays-Bas,10,12,14,36,Tokyo,2021
7,8,France,10,12,11,33,Tokyo,2021
8,9,Italie,10,10,20,40,Tokyo,2021
9,10,Allemagne,10,10,16,36,Tokyo,2021


In [4]:
# Create a Translator object
#translator = Translator(to_lang="en", from_lang="fr")

# Extract unique values from 'Country' and 'Location' columns
unique_countries = medals['Country'].unique()
unique_locations = medals['Location'].unique()

# Create an empty dictionary to store translations
translations_dict = {}

# Translate unique countries
for country in unique_countries:
    try:
        translation = translator.translate(country)
        translations_dict[country] = translation
    except Exception as e:
        translations_dict[country] = country  # Use the original word if translation fails

# Translate unique locations
for location in unique_locations:
    try:
        translation = translator.translate(location)
        translations_dict[location] = translation
    except Exception as e:
        translations_dict[location] = location  # Use the original word if translation fails

# Display the translations dictionary
print(translations_dict)


{'Etats Unis': 'Etats Unis', 'Chine': 'Chine', 'Japon': 'Japon', 'Grande Bretagne': 'Grande Bretagne', 'Russie': 'Russie', 'Australie': 'Australie', 'Pays-Bas': 'Pays-Bas', 'France': 'France', 'Italie': 'Italie', 'Allemagne': 'Allemagne', 'Canada': 'Canada', 'Brésil': 'Brésil', 'Nouvelle-Zélande': 'Nouvelle-Zélande', 'Cuba': 'Cuba', 'Hongrie': 'Hongrie', 'Corée du Sud': 'Corée du Sud', 'Pologne': 'Pologne', 'République Tchèque': 'République Tchèque', 'Kenya': 'Kenya', 'Norvège': 'Norvège', 'Jamaïque': 'Jamaïque', 'Espagne': 'Espagne', 'Suède': 'Suède', 'Suisse': 'Suisse', 'Danemark': 'Danemark', 'Croatie': 'Croatie', 'Iran': 'Iran', 'Serbie': 'Serbie', 'Belgique': 'Belgique', 'Slovénie': 'Slovénie', 'Ouzbékistan': 'Ouzbékistan', 'Géorgie': 'Géorgie', 'Taïwan': 'Taïwan', 'Turquie': 'Turquie', 'Bulgarie': 'Bulgarie', 'Grèce': 'Grèce', 'Ouganda': 'Ouganda', 'Equateur': 'Equateur', 'Irlande': 'Irlande', 'Israël': 'Israël', 'Lettonie': 'Lettonie', 'Qatar': 'Qatar', 'Bahamas': 'Bahamas', 'Ko

In [5]:
# we create a pickle to save the translation dictionary
with open('pickles/translations_dict.pickle', 'wb') as handle:
    pickle.dump(translations_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [6]:
# we load the pickle
with open('pickles/translations_dict.pickle', 'rb') as handle:
    translations_dict = pickle.load(handle)

In [7]:
# let's apply the translation to all the elements of the Country and Location columns
medals['Country'] = medals['Country'].apply(lambda x: translations_dict[x])
medals['Location'] = medals['Location'].apply(lambda x: translations_dict[x])
medals.head(15)

Unnamed: 0,Place,Country,Gold,Silver,Bronze,Total,Location,Year
0,1,Etats Unis,38,41,34,113,Tokyo,2021
1,2,Chine,36,32,18,86,Tokyo,2021
2,3,Japon,27,12,16,55,Tokyo,2021
3,4,Grande Bretagne,22,21,22,65,Tokyo,2021
4,5,Russie,20,28,23,71,Tokyo,2021
5,6,Australie,17,7,21,45,Tokyo,2021
6,7,Pays-Bas,10,12,14,36,Tokyo,2021
7,8,France,10,12,11,33,Tokyo,2021
8,9,Italie,10,10,20,40,Tokyo,2021
9,10,Allemagne,10,10,16,36,Tokyo,2021


### Data Analysis from Medals data

In [8]:
# we search every country with "Germany" in the name
medals[medals['Country'].str.contains('Germany')]["Country"].unique()

array([], dtype=object)

In [9]:
# we replace every "East Germany" or "West Germany" or "Statement in intervention submitted by Germany" with "Germany
medals['Country'] = medals['Country'].replace(['East Germany', 'West Germany', 'Statement in intervention submitted by Germany', 'Germany, Empire'], 'Germany')
medals[medals['Country'].str.contains('Germany')]["Country"].unique()

array([], dtype=object)

In [10]:
# the top 10 countries that won the most medals in the history of the olympics
medals.groupby('Country')['Total'].sum().sort_values(ascending=False).head(10)

# on a bar chart px countries in the x axis and total medals in the y axis
fig = px.bar(medals.groupby('Country')['Total'].sum().sort_values(ascending=False).head(10),
             x=medals.groupby('Country')['Total'].sum().sort_values(ascending=False).head(10).index,
             y=medals.groupby('Country')['Total'].sum().sort_values(ascending=False).head(10).values,
             labels={'x': 'Country', 'y': 'Total Medals'})
fig.show()


In [11]:
# evolution of total medals distributed over the years
fig = px.line(medals.groupby('Year')['Total'].sum(),
              x=medals.groupby('Year')['Total'].sum().index,
              y=medals.groupby('Year')['Total'].sum().values,
              labels={'x': 'Year', 'y': 'Total Medals'})
fig.show()

In [12]:
# Plot a scatter plot with the x-axis representing the year and y-axis representing the total number of medals for each country.
# The size of the points should be the total number of medals won by the country.
# The color of the points should be the country name.
fig = px.scatter(medals.groupby(['Year', 'Country'])['Total'].sum().reset_index(),
                 x='Year',
                 y='Total',
                 size='Total',
                 color='Country',
                 labels={'x': 'Year', 'y': 'Total Medals'})
fig.show()

In [13]:
# Group bars by year, showing the distribution of Gold, Silver, and Bronze medals for each year.
fig = px.bar(medals, x='Country', y=['Gold', 'Silver', 'Bronze'],
             color_discrete_map={'Gold': 'gold', 'Silver': 'silver', 'Bronze': 'brown'},
             title='Medals Distribution by Country',
             labels={'value': 'Number of Medals', 'variable': 'Medal Type'},
             barmode='group')
fig.show()

In [14]:
df = medals.sort_values(by='Total', ascending=False).head(10)
fig = px.bar(df, x='Country', y=['Gold', 'Silver', 'Bronze'],
             color_discrete_map={'Gold': 'gold', 'Silver': 'silver', 'Bronze': 'brown'},
             title='Top 10 Countries - Medals Distribution',
             labels={'value': 'Number of Medals', 'variable': 'Medal Type'},
             barmode='group')
fig.show()

In [15]:
# line chart showing the progression of france - its total medals over the years.
fig = px.line(medals[medals['Country'] == 'France'].groupby('Year')['Total'].sum(),
              x=medals[medals['Country'] == 'France'].groupby('Year')['Total'].sum().index,
              y=medals[medals['Country'] == 'France'].groupby('Year')['Total'].sum().values,
              labels={'x': 'Year', 'y': 'Total Medals'})
fig.show()

### A historical analysis ...

In [18]:
# line chart showing the progression of the United States - its total medals over the years.
fig = px.scatter(medals[medals['Country'] == 'Etats Unis'].groupby('Year')['Total'].sum(),
              x=medals[medals['Country'] == 'Etats Unis'].groupby('Year')['Total'].sum().index,
              y=medals[medals['Country'] == 'Etats Unis'].groupby('Year')['Total'].sum().values,
              labels={'x': 'Year', 'y': 'Total Medals'})
fig.show()

We can observe the absence of the United States in the 1980 Moscow Olympics : the country boycotted the games to protest the Soviet invasion of Afghanistan. The Soviet Union retaliated by boycotting the 1984 Los Angeles Olympics.

In [19]:
fig = px.scatter(medals[medals['Country'] == 'U.R.S.S.'].groupby('Year')['Total'].sum(),
              x=medals[medals['Country'] == 'U.R.S.S.'].groupby('Year')['Total'].sum().index,
              y=medals[medals['Country'] == 'U.R.S.S.'].groupby('Year')['Total'].sum().values,
              labels={'x': 'Year', 'y': 'Total Medals'})
fig.show()

### Environmental impact of hosting the Olympics : 

#### Do the olympic games have a remarkable impact on a country's greenhouse gas emissions?

In [22]:
GH_emissions = pd.read_csv('data_csv/cp_GH_emissions_final_version.csv')
GH_emissions.head(5)

Unnamed: 0,sector,country,year,GH emissions
0,Energy Industries,Australia,1990,143172.76
1,Energy Industries,Australia,1991,146396.55
2,Energy Industries,Australia,1992,149719.8
3,Energy Industries,Australia,1993,151492.72
4,Energy Industries,Australia,1994,152307.64
