In [1]:
import pandas as pd

# Charger le fichier Excel
df = pd.read_excel('olympic_medals.xlsx')

# Afficher les premières lignes pour vérifier
print(df.head())


   Unnamed: 0 discipline_title     slug_game    event_title event_gender  \
0           0          Curling  beijing-2022  Mixed Doubles        Mixed   
1           1          Curling  beijing-2022  Mixed Doubles        Mixed   
2           2          Curling  beijing-2022  Mixed Doubles        Mixed   
3           3          Curling  beijing-2022  Mixed Doubles        Mixed   
4           4          Curling  beijing-2022  Mixed Doubles        Mixed   

  medal_type participant_type participant_title  \
0       GOLD         GameTeam             Italy   
1       GOLD         GameTeam             Italy   
2     SILVER         GameTeam            Norway   
3     SILVER         GameTeam            Norway   
4     BRONZE         GameTeam            Sweden   

                                         athlete_url     athlete_full_name  \
0  https://olympics.com/en/athletes/stefania-cons...  Stefania CONSTANTINI   
1      https://olympics.com/en/athletes/amos-mosaner          Amos MOSANER   
2 

In [2]:
# Vérifier la structure des données
print(df.columns)

# Renommer les colonnes si nécessaire pour faciliter l'accès
df.columns = ['','discipline_title', 'slug_game', 'event_title', 'event_gender', 'medal_type', 'participant_type', 'participant_title', 'athlete_url', 'athlete_full_name', 'country_name', 'country_code', 'country_3_letter_code']


# Créer un DataFrame avec les médailles agrégées par pays et par année
df_medals = df.groupby(['country_name', 'slug_game', 'medal_type']).size().unstack(fill_value=0).reset_index()

# Renommer les colonnes pour plus de clarté
df_medals.columns = ['country', 'year', 'bronze', 'gold', 'silver']

df_medals['year'] = df_medals['year'].apply(lambda x: int(x.split('-')[-1]))

# Afficher le DataFrame préparé
print(df_medals.head(100))


Index(['Unnamed: 0', 'discipline_title', 'slug_game', 'event_title',
       'event_gender', 'medal_type', 'participant_type', 'participant_title',
       'athlete_url', 'athlete_full_name', 'country_name', 'country_code',
       'country_3_letter_code'],
      dtype='object')
        country  year  bronze  gold  silver
0   Afghanistan  2008       1     0       0
1   Afghanistan  2012       1     0       0
2       Algeria  1996       1     2       0
3       Algeria  1992       1     1       0
4       Algeria  2008       1     0       1
..          ...   ...     ...   ...     ...
95      Austria  1984       1     1       1
96      Austria  1956       4     0       0
97      Austria  1968       3     0       2
98      Austria  1976       1     0       0
99      Austria  1980       1     1       3

[100 rows x 5 columns]


In [3]:
countries = df_medals['country'].unique()
countries

array(['Afghanistan', 'Algeria', 'Argentina', 'Armenia', 'Australasia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Barbados', 'Belarus', 'Belgium', 'Bermuda', 'Bohemia', 'Botswana',
       'Brazil', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cameroon',
       'Canada', 'Chile', 'Chinese Taipei', 'Colombia', 'Costa Rica',
       'Croatia', 'Cuba', 'Cyprus', 'Czech Republic', 'Czechoslovakia',
       "Côte d'Ivoire", "Democratic People's Republic of Korea",
       'Denmark', 'Djibouti', 'Dominican Republic', 'Ecuador', 'Egypt',
       'Eritrea', 'Estonia', 'Ethiopia', 'Federal Republic of Germany',
       'Fiji', 'Finland', 'France', 'Gabon', 'Georgia',
       'German Democratic Republic (Germany)', 'Germany', 'Ghana',
       'Great Britain', 'Greece', 'Grenada', 'Guatemala', 'Guyana',
       'Haiti', 'Hong Kong, China', 'Hungary', 'Iceland',
       'Independent Olympic Athletes', 'India', 'Indonesia', 'Iraq',
       'Ireland', 'Islamic Republic of Iran', 'Is

In [4]:
# Réduction du nombre de doublons
for country in countries:
    if 'Soviet Union' in country:
        df_medals['country'].replace(country, 'Russian Federation', inplace=True)        
    elif 'Olympic Athletes from Russia' in country: 
        df_medals['country'].replace(country, 'Russian Federation', inplace=True)
    elif 'Federal Republic of Germany' in country: 
        df_medals['country'].replace(country, 'Germany', inplace=True)   
    elif 'German Democratic Republic (Germany)' in country: 
        df_medals['country'].replace(country, 'Germany', inplace=True)  

In [5]:
from sklearn.model_selection import train_test_split

# Sélectionner les années comme caractéristiques et les médailles comme étiquettes
features = df_medals[['year']]
labels = df_medals[['gold', 'silver', 'bronze']]

# Diviser les données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)


In [6]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential([
    Dense(64, activation='relu', input_shape=(1,)),  # 1 caractéristique : 'year'
    Dense(64, activation='relu'),
    Dense(3)  # 3 étiquettes : 'gold', 'silver', 'bronze'
])

model.compile(optimizer='adam', loss='mean_squared_error')

# Entraîner le modèle
model.fit(X_train, y_train, epochs=50, validation_split=0.2)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x22c8299b7f0>

In [7]:
import numpy as np

# Prédire pour l'année 2024
year_to_predict = np.array([[2024]])

predictions = model.predict(year_to_predict)

print("Predicted medals for 2024:")
print(f"Gold: {predictions[0][0]}, Silver: {predictions[0][1]}, Bronze: {predictions[0][2]}")


Predicted medals for 2024:
Gold: 4.743855953216553, Silver: 2.915093421936035, Bronze: 4.45546817779541


In [8]:
# Obtenir les 10 meilleurs pays en fonction des médailles totales
top_10_countries = df_medals.groupby('country')[['gold', 'silver', 'bronze']].sum().sort_values(by=['gold', 'silver', 'bronze'], ascending=False).head(10).index

predictions_top_10 = {}
for country in top_10_countries:
    country_data = df_medals[df_medals['country'] == country]
    last_year = country_data['year'].max()
    predictions = model.predict(np.array([[last_year + 4]]))  # Prédire pour la prochaine olympiade

    predictions_top_10[country] = {
        'gold': predictions[0][0],
        'silver': predictions[0][1],
        'bronze': predictions[0][2]
    }

print("Predicted medals for top 10 countries:")
for country, medals in predictions_top_10.items():
    print(f"{country} - Gold: {medals['gold']}, Silver: {medals['silver']}, Bronze: {medals['bronze']}")


Predicted medals for top 10 countries:
United States of America - Gold: 4.748608112335205, Silver: 2.918076515197754, Bronze: 4.459842681884766
Russian Federation - Gold: 4.739121437072754, Silver: 2.912278175354004, Bronze: 4.451017379760742
Germany - Gold: 4.748608112335205, Silver: 2.918076515197754, Bronze: 4.459842681884766
People's Republic of China - Gold: 4.748608112335205, Silver: 2.918076515197754, Bronze: 4.459842681884766
Great Britain - Gold: 4.748608112335205, Silver: 2.918076515197754, Bronze: 4.459842681884766
France - Gold: 4.748608112335205, Silver: 2.918076515197754, Bronze: 4.459842681884766
Italy - Gold: 4.748608112335205, Silver: 2.918076515197754, Bronze: 4.459842681884766
Sweden - Gold: 4.748608112335205, Silver: 2.918076515197754, Bronze: 4.459842681884766
Norway - Gold: 4.748608112335205, Silver: 2.918076515197754, Bronze: 4.459842681884766
Hungary - Gold: 4.748608112335205, Silver: 2.918076515197754, Bronze: 4.459842681884766


In [10]:
# Importation des bibliothèques
import pickle
import os
# Création d'un dossier s'il n'existe pas
if not os.path.exists('../models'):
    os.mkdir('models')
    pass
# Sauvegarde du modèle
pickle.dump(model, open('../models/medals_model.pkl','wb'))

Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers\dense
......vars
.........0
.........1
...layers\dense_1
......vars
.........0
.........1
...layers\dense_2
......vars
.........0
.........1
...metrics\mean
......vars
.........0
.........1
...optimizer
......vars
.........0
.........1
.........10
.........11
.........12
.........2
.........3
.........4
.........5
.........6
.........7
.........8
.........9
...vars
Keras model archive saving:
File Name                                             Modified             Size
config.json                                    2024-05-28 16:36:50         1811
metadata.json                                  2024-05-28 16:36:50           64
variables.h5                                   2024-05-28 16:36:50        73352


In [None]:
from tensorflow.keras.models import load_model


model.save('model_Prédiction_Top10.h5')
model = load_model('model_Prédiction_Top10.h5')
