# PRÉDICTION JEUX OLYMPIQUE 2024

Télécharger : 

- pip install pandas
- pip install numpy
- pip install scikit-learn
- pip install torch


Le modèle est surajusté aux données d'entrainement ce qui explique une accuracy de 99%. Le modèle à appis non seulement les tendances mais aussi les données d'entraînement. 
Il faudrais revoir les split, par manque de temps cela n'as pas été effectué. 

In [None]:
# fais les import nécessaire

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim

# Charge les données
data = pd.read_csv('Dataset.csv')

# Remplace les valeurs manquantes dans rank_position par 'NC'
data['rank_position'].fillna('NC', inplace=True)

# Crée des variables indicatrices pour les médailles
data['is_gold'] = data['medal_type'].apply(lambda x: 1 if x == 'GOLD' else 0)
data['is_silver'] = data['medal_type'].apply(lambda x: 1 if x == 'SILVER' else 0)
data['is_bronze'] = data['medal_type'].apply(lambda x: 1 if x == 'BRONZE' else 0)

# Calcule le total des médailles par pays et par année
medals_per_country_year = data.groupby(['country_name', 'game_year']).agg({
    'is_gold': 'sum',
    'is_silver': 'sum',
    'is_bronze': 'sum'
}).reset_index()

# Prépare les features et les cibles
features = medals_per_country_year[['game_year', 'is_gold', 'is_silver', 'is_bronze']]
target_gold = medals_per_country_year['is_gold']
target_silver = medals_per_country_year['is_silver']
target_bronze = medals_per_country_year['is_bronze']

# Divise les données en ensembles d'entraînement et de test
X_train, X_test, y_train_gold, y_test_gold = train_test_split(features, target_gold, test_size=0.2, random_state=42)
_, _, y_train_silver, y_test_silver = train_test_split(features, target_silver, test_size=0.2, random_state=42)
_, _, y_train_bronze, y_test_bronze = train_test_split(features, target_bronze, test_size=0.2, random_state=42)

# Normalise les données
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convertir les données en tenseurs PyTorch
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_gold_tensor = torch.tensor(y_train_gold.values, dtype=torch.float32).view(-1, 1)
y_train_silver_tensor = torch.tensor(y_train_silver.values, dtype=torch.float32).view(-1, 1)
y_train_bronze_tensor = torch.tensor(y_train_bronze.values, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_gold_tensor = torch.tensor(y_test_gold.values, dtype=torch.float32).view(-1, 1)
y_test_silver_tensor = torch.tensor(y_test_silver.values, dtype=torch.float32).view(-1, 1)
y_test_bronze_tensor = torch.tensor(y_test_bronze.values, dtype=torch.float32).view(-1, 1)

# Définie le modèle
class Net(nn.Module):
    def __init__(self, input_dim, hidden1_dim, hidden2_dim, output_dim):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden1_dim)
        self.fc2 = nn.Linear(hidden1_dim, hidden2_dim)
        self.fc3 = nn.Linear(hidden2_dim, output_dim)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

input_dim = X_train_scaled.shape[1]
hidden1_dim = 64
hidden2_dim = 32
output_dim = 1

model_gold = Net(input_dim, hidden1_dim, hidden2_dim, output_dim)
model_silver = Net(input_dim, hidden1_dim, hidden2_dim, output_dim)
model_bronze = Net(input_dim, hidden1_dim, hidden2_dim, output_dim)

# Définie la fonction de perte et l'optimiseur
criterion = nn.MSELoss()
optimizer_gold = optim.Adam(model_gold.parameters(), lr=0.01)
optimizer_silver = optim.Adam(model_silver.parameters(), lr=0.01)
optimizer_bronze = optim.Adam(model_bronze.parameters(), lr=0.01)

# Entraînee les modèles
epochs = 50
batch_size = 10

def train_model(model, optimizer, X_train_tensor, y_train_tensor, epochs, batch_size):
    for epoch in range(epochs):
        permutation = torch.randperm(X_train_tensor.size()[0])
        for i in range(0, X_train_tensor.size()[0], batch_size):
            indices = permutation[i:i+batch_size]
            batch_x, batch_y = X_train_tensor[indices], y_train_tensor[indices]
            
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
        
        if (epoch + 1) % 10 == 0:
            train_loss = criterion(model(X_train_tensor), y_train_tensor)
            print(f"Epoch {epoch + 1}, Loss: {train_loss.item()}")

print("Training Gold Model")
train_model(model_gold, optimizer_gold, X_train_tensor, y_train_gold_tensor, epochs, batch_size)
print("Training Silver Model")
train_model(model_silver, optimizer_silver, X_train_tensor, y_train_silver_tensor, epochs, batch_size)
print("Training Bronze Model")
train_model(model_bronze, optimizer_bronze, X_train_tensor, y_train_bronze_tensor, epochs, batch_size)

# Prépare les données pour les prédictions des JO de Paris 2024
paris_2024_data = medals_per_country_year[medals_per_country_year['game_year'] == 2020]
paris_2024_features = paris_2024_data[['game_year', 'is_gold', 'is_silver', 'is_bronze']]

# Normalise les données
paris_2024_scaled = scaler.transform(paris_2024_features)

# Converti en tenseur PyTorch
paris_2024_tensor = torch.tensor(paris_2024_scaled, dtype=torch.float32)

# Prédie les médailles pour les JO de Paris 2024
paris_2024_gold_predictions = model_gold(paris_2024_tensor).detach().numpy()
paris_2024_silver_predictions = model_silver(paris_2024_tensor).detach().numpy()
paris_2024_bronze_predictions = model_bronze(paris_2024_tensor).detach().numpy()

# Ajoute les prédictions au dataframe
paris_2024_data['predicted_gold'] = paris_2024_gold_predictions
paris_2024_data['predicted_silver'] = paris_2024_silver_predictions
paris_2024_data['predicted_bronze'] = paris_2024_bronze_predictions

# Affiche le tableau des prédictions pour les JO de Paris 2024
paris_2024_predictions_table = paris_2024_data[['country_name', 'predicted_gold', 'predicted_silver', 'predicted_bronze']]
print(paris_2024_predictions_table)

# Enregistre le tableau des prédictions dans un fichier CSV
paris_2024_predictions_table.to_csv('paris_2024_predictions3.csv', index=False)
