In [2]:
import pandas as pd

# Chemin vers le fichier Excel
excel_file = r'C:\Users\mazin\Downloads\olympic_medals.xlsx'

# Charger le fichier Excel
df = pd.read_excel(excel_file)

# Afficher les premières lignes pour vérifier
print(df.head())


   Unnamed: 0 discipline_title     slug_game    event_title event_gender  \
0           0          Curling  beijing-2022  Mixed Doubles        Mixed   
1           1          Curling  beijing-2022  Mixed Doubles        Mixed   
2           2          Curling  beijing-2022  Mixed Doubles        Mixed   
3           3          Curling  beijing-2022  Mixed Doubles        Mixed   
4           4          Curling  beijing-2022  Mixed Doubles        Mixed   

  medal_type participant_type participant_title  \
0       GOLD         GameTeam             Italy   
1       GOLD         GameTeam             Italy   
2     SILVER         GameTeam            Norway   
3     SILVER         GameTeam            Norway   
4     BRONZE         GameTeam            Sweden   

                                         athlete_url     athlete_full_name  \
0  https://olympics.com/en/athletes/stefania-cons...  Stefania CONSTANTINI   
1      https://olympics.com/en/athletes/amos-mosaner          Amos MOSANER   
2 

In [3]:
# Vérifier la structure des données
print(df.columns)

# Renommer les colonnes si nécessaire pour faciliter l'accès
df.columns = ['','discipline_title', 'slug_game', 'event_title', 'event_gender', 'medal_type', 'participant_type', 'participant_title', 'athlete_url', 'athlete_full_name', 'country_name', 'country_code', 'country_3_letter_code']


# Créer un DataFrame avec les médailles agrégées par pays et par année
df_medals = df.groupby(['country_name', 'slug_game', 'medal_type']).size().unstack(fill_value=0).reset_index()

# Renommer les colonnes pour plus de clarté
df_medals.columns = ['country', 'year', 'bronze', 'gold', 'silver']

df_medals['year'] = df_medals['year'].apply(lambda x: int(x.split('-')[-1]))

# Afficher le DataFrame préparé
print(df_medals.head(100))


Index(['Unnamed: 0', 'discipline_title', 'slug_game', 'event_title',
       'event_gender', 'medal_type', 'participant_type', 'participant_title',
       'athlete_url', 'athlete_full_name', 'country_name', 'country_code',
       'country_3_letter_code'],
      dtype='object')
        country  year  bronze  gold  silver
0   Afghanistan  2008       1     0       0
1   Afghanistan  2012       1     0       0
2       Algeria  1996       1     2       0
3       Algeria  1992       1     1       0
4       Algeria  2008       1     0       1
..          ...   ...     ...   ...     ...
95      Austria  1984       1     1       1
96      Austria  1956       4     0       0
97      Austria  1968       3     0       2
98      Austria  1976       1     0       0
99      Austria  1980       1     1       3

[100 rows x 5 columns]


In [4]:
from sklearn.model_selection import train_test_split

# Sélectionner les années comme caractéristiques et les médailles comme étiquettes
features = df_medals[['year']]
labels = df_medals[['gold', 'silver', 'bronze']]

# Diviser les données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)


In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential([
    Dense(64, activation='relu', input_shape=(1,)),  # 1 caractéristique : 'year'
    Dense(64, activation='relu'),
    Dense(3)  # 3 étiquettes : 'gold', 'silver', 'bronze'
])

model.compile(optimizer='adam', loss='mean_squared_error')

# Entraîner le modèle
model.fit(X_train, y_train, epochs=50, validation_split=0.2)


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 3904.9609 - val_loss: 187.4830
Epoch 2/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 105.1715 - val_loss: 42.7757
Epoch 3/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 57.1430 - val_loss: 33.9684
Epoch 4/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 52.9805 - val_loss: 33.0636
Epoch 5/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 56.7453 - val_loss: 33.7906
Epoch 6/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 42.5618 - val_loss: 41.4621
Epoch 7/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 51.7850 - val_loss: 34.0069
Epoch 8/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 47.6742 - val_loss: 33.4296
Epoch 9/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x279d78cbaa0>

In [6]:
import numpy as np

# Prédire pour l'année 2024
year_to_predict = np.array([[2024]])

predictions = model.predict(year_to_predict)

print("Predicted medals for 2024:")
print(f"Gold: {predictions[0][0]}, Silver: {predictions[0][1]}, Bronze: {predictions[0][2]}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Predicted medals for 2024:
Gold: 3.394793748855591, Silver: -0.06456710398197174, Bronze: 2.00795578956604


In [7]:
# Obtenir les 10 meilleurs pays en fonction des médailles totales
top_10_countries = df_medals.groupby('country')[['gold', 'silver', 'bronze']].sum().sort_values(by=['gold', 'silver', 'bronze'], ascending=False).head(10).index

predictions_top_10 = {}
for country in top_10_countries:
    country_data = df_medals[df_medals['country'] == country]
    last_year = country_data['year'].max()
    predictions = model.predict(np.array([[last_year + 4]]))  # Prédire pour la prochaine olympiade

    predictions_top_10[country] = {
        'gold': predictions[0][0],
        'silver': predictions[0][1],
        'bronze': predictions[0][2]
    }

print("Predicted medals for top 10 countries:")
for country, medals in predictions_top_10.items():
    print(f"{country} - Gold: {medals['gold']}, Silver: {medals['silver']}, Bronze: {medals['bronze']}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Predicted medals for top 10 countries:
United States of America - Gold: 3.3981151580810547, Silver: -0.0646863728761673, Bronze: 2.010000467300415
Soviet Union - Gold: 3.3415162563323975, Silver: -0.06328724324703217, Bronze: 1.976030945777893
Germany - Gold: 3.3981151580810547, 

In [10]:
from tensorflow.keras.models import load_model


model.save('model_Prédiction_Top10.h5')
model = load_model('model_Prédiction_Top10.h5')


