In [142]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
import gym
from gym import spaces
import numpy as np
from stable_baselines3 import PPO
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [95]:
df = pd.read_csv("olympics-economics.csv")

In [96]:
print(df.head())

         country country_code  gold  silver  bronze  total       gdp  \
0  United States          USA    40      44      42    126  81695.19   
1          China          CHN    40      27      24     91  12614.06   
2          Japan          JPN    20      12      13     45  33834.39   
3      Australia          AUS    18      19      16     53  64711.77   
4         France          FRA    16      26      22     64  44460.82   

   gdp_year  population  
0      2023       334.9  
1      2023      1410.7  
2      2023       124.5  
3      2023        26.6  
4      2023        68.2  


# Explore and Describe the dataset

In [97]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   country       90 non-null     object 
 1   country_code  90 non-null     object 
 2   gold          90 non-null     int64  
 3   silver        90 non-null     int64  
 4   bronze        90 non-null     int64  
 5   total         90 non-null     int64  
 6   gdp           90 non-null     float64
 7   gdp_year      90 non-null     int64  
 8   population    90 non-null     float64
dtypes: float64(2), int64(5), object(2)
memory usage: 6.5+ KB
None


In [98]:
print(df.isnull().sum())

country         0
country_code    0
gold            0
silver          0
bronze          0
total           0
gdp             0
gdp_year        0
population      0
dtype: int64


In [99]:
print(df.describe())

            gold     silver     bronze       total            gdp  \
count  90.000000  90.000000  90.000000   90.000000      90.000000   
mean    3.644444   3.633333   4.255556   11.533333   24478.053556   
std     7.018933   6.797967   6.586607   19.782071   25547.857382   
min     0.000000   0.000000   0.000000    1.000000    1014.210000   
25%     0.000000   0.000000   1.000000    2.000000    5815.180000   
50%     1.000000   1.000000   2.000000    5.000000   13061.185000   
75%     3.000000   3.000000   5.000000    9.000000   34484.677500   
max    40.000000  44.000000  42.000000  126.000000  103684.880000   

          gdp_year   population  
count    90.000000    90.000000  
mean   2022.977778    69.027778  
std       0.148231   213.286437  
min    2022.000000     0.100000  
25%    2023.000000     5.325000  
50%    2023.000000    12.150000  
75%    2023.000000    48.550000  
max    2023.000000  1428.600000  


# Data Preprocessing

Handling missing values

In [100]:
df['gdp'] = df['gdp'].fillna(df['gdp'].median())

In [101]:
df.fillna(method='ffill', inplace=True)

  df.fillna(method='ffill', inplace=True)


Normalize Numerical Columns

In [102]:
scaler = MinMaxScaler()
df[['gdp', 'population']] = scaler.fit_transform(df[['gdp', 'population']])

In [103]:
print(df.head())

         country country_code  gold  silver  bronze  total       gdp  \
0  United States          USA    40      44      42    126  0.785823   
1          China          CHN    40      27      24     91  0.112981   
2          Japan          JPN    20      12      13     45  0.319665   
3      Australia          AUS    18      19      16     53  0.620407   
4         France          FRA    16      26      22     64  0.423165   

   gdp_year  population  
0      2023    0.234372  
1      2023    0.987469  
2      2023    0.087084  
3      2023    0.018551  
4      2023    0.047672  


# Predicting Medal Counts (Supervised Learning)

Splitting data

In [104]:
X = df[['gdp', 'population']]
y = df['total']

In [105]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Apply linear regression

In [106]:
lr_model = LinearRegression()

In [107]:
# model training
lr_model.fit(X_train, y_train)

In [108]:
# prediction on test
y_pred = lr_model.predict(X_test)

# calculate accuracy
r2 = r2_score(y_test, y_pred)
print(f'R-squared for Linear Regression: {r2}')

R-squared for Linear Regression: -0.1182356507328477


Apply descision tree regression

In [109]:
dt_model = DecisionTreeRegressor(random_state=42)

In [110]:
dt_model.fit(X_train, y_train)

In [111]:
# Predict on the test data
y_pred_dt = dt_model.predict(X_test)

# calculate accuracy
r2_dt = r2_score(y_test, y_pred_dt)
print(f'R-squared for Decision Tree: {r2_dt}')

R-squared for Decision Tree: 0.0948236090179313


Comparison

In [112]:
print(f'Linear Regression R-squard: {r2}')
print(f'Decision Tree R-squard: {r2_dt}')

Linear Regression R-squard: -0.1182356507328477
Decision Tree R-squard: 0.0948236090179313


# Clustering Countries (Unsupervised Learning)

Applying KMeans clustering

In [113]:
kmeans = KMeans(n_clusters=3, random_state=42)

In [114]:
kmeans.fit(X)

In [115]:
df['Cluster'] = kmeans.labels_

In [116]:
print(df[['country', 'gdp', 'population', 'total', 'Cluster']].head())

         country       gdp  population  total  Cluster
0  United States  0.785823    0.234372    126        0
1          China  0.112981    0.987469     91        2
2          Japan  0.319665    0.087084     45        1
3      Australia  0.620407    0.018551     53        0
4         France  0.423165    0.047672     64        0


Applying Hierarchical Clustering

In [117]:
hier_cluster = AgglomerativeClustering(n_clusters=3)

In [118]:
df['HierCluster'] = hier_cluster.fit_predict(X)

In [119]:
print(df[['country', 'gdp', 'population', 'total', 'HierCluster']].head())

         country       gdp  population  total  HierCluster
0  United States  0.785823    0.234372    126            1
1          China  0.112981    0.987469     91            2
2          Japan  0.319665    0.087084     45            0
3      Australia  0.620407    0.018551     53            1
4         France  0.423165    0.047672     64            1


Comparison

In [120]:
# Calculate the silhouette score for K-Means
sil_score_kmeans = silhouette_score(X, kmeans.labels_)
print(f'Silhouette Score for K-Means: {sil_score_kmeans}')

Silhouette Score for K-Means: 0.690821383040346


In [121]:
# Calculate the silhouette score for Hierarchical Clustering
sil_score_hier = silhouette_score(X, df['HierCluster'])
print(f'Silhouette Score for Hierarchical Clustering: {sil_score_hier}')

Silhouette Score for Hierarchical Clustering: 0.690821383040346


# (Reinforcement Learning)

In [132]:
df = df[['gdp', 'population', 'total']]

In [133]:
# Normalize GDP and Population
scaler = StandardScaler()
df[['gdp', 'population']] = scaler.fit_transform(df[['gdp', 'population']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['gdp', 'population']] = scaler.fit_transform(df[['gdp', 'population']])


In [134]:
# Normalize Medals (scale between 0 and 1)
max_medals = df['total'].max()
df['Medals'] = df['total'] / max_medals

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Medals'] = df['total'] / max_medals


In [135]:
class OlympicMedalEnv(gym.Env):
    def __init__(self, data):
        super(OlympicMedalEnv, self).__init__()
        self.data = data
        self.current_step = 0
        
        # Define the action space (continuous range [0, 1] for predicting normalized medals)
        self.action_space = spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32)
        
        # Define the observation space (2 features: GDP and Population)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32)

    def reset(self):
        # Reset the step counter and return the first observation (GDP and Population)
        self.current_step = 0
        return self.data.iloc[self.current_step, :2].values

    def step(self, action):
        # Get actual medals for the current step (normalized)
        actual_medals = self.data.iloc[self.current_step, -1]
        
        # Reward is negative absolute error between predicted and actual medals
        reward = -abs(actual_medals - action[0])
        
        # Move to the next step
        self.current_step += 1
        
        # Check if we have reached the end of the data
        done = self.current_step >= len(self.data)
        
        # Get the next observation (GDP and Population) or return zeros if done
        if not done:
            next_state = self.data.iloc[self.current_step, :2].values
        else:
            next_state = np.zeros(2)
        
        return next_state, reward, done, {}

# Create the environment
env = OlympicMedalEnv(df)

In [136]:
# Instantiate the PPO model
model = PPO('MlpPolicy', env, verbose=1)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




In [137]:
# Train the model
model.learn(total_timesteps=10000)

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 90       |
|    ep_rew_mean     | -33      |
| time/              |          |
|    fps             | 1372     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 90          |
|    ep_rew_mean          | -31.1       |
| time/                   |             |
|    fps                  | 888         |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.016723264 |
|    clip_fraction        | 0.104       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.39       |
|    explained_variance   | 0.0117      |
|    learning_rate        | 0.

<stable_baselines3.ppo.ppo.PPO at 0x1eae3d6f110>

In [138]:
obs = env.reset()
for step in range(len(df)):
    action, _ = model.predict(obs)
    obs, reward, done, info = env.step(action)
    actual_medals = df.iloc[step]["Medals"] * max_medals  # Denormalize medals for comparison
    predicted_medals = action[0] * max_medals  # Denormalize predicted medals
    print(f"Step {step}: Predicted Medals: {predicted_medals}, Actual Medals: {actual_medals}, Reward: {reward}")
    
    if done:
        break

Step 0: Predicted Medals: 0.0, Actual Medals: 1.0, Reward: -1.0
Step 1: Predicted Medals: 0.0, Actual Medals: 0.6942341292952826, Reward: -0.6942341292952826
Step 2: Predicted Medals: 0.0, Actual Medals: 0.2923704135119395, Reward: -0.2923704135119395
Step 3: Predicted Medals: 0.7425689101219177, Actual Medals: 0.3622597553873035, Reward: -0.38030915473461424
Step 4: Predicted Medals: 0.0, Actual Medals: 0.458357600465929, Reward: -0.458357600465929
Step 5: Predicted Medals: 0.0, Actual Medals: 0.19627256843331395, Reward: -0.19627256843331395
Step 6: Predicted Medals: 1.0, Actual Medals: 0.4670937682003496, Reward: -0.5329062317996505
Step 7: Predicted Medals: 0.0, Actual Medals: 0.17880023296447292, Reward: -0.17880023296447292
Step 8: Predicted Medals: 0.6858879923820496, Actual Medals: 0.24868957483983695, Reward: -0.4371984175422126
Step 9: Predicted Medals: 0.6582713723182678, Actual Medals: 0.18753640069889346, Reward: -0.4707349716193744
Step 10: Predicted Medals: 0.72626811265

Calculate accuracy

In [140]:
# Reset the environment and start predicting
obs = env.reset()
predictions = []
actual_values = []

for step in range(len(df)):
    action, _ = model.predict(obs)
    obs, reward, done, info = env.step(action)

    predicted_medals = action[0] * max_medals  # De-normalize predicted medals
    actual_medals = df.iloc[step]["Medals"] * max_medals  # De-normalize actual medals

    predictions.append(predicted_medals)
    actual_values.append(actual_medals)

    if done:
        break

In [143]:
# Mean Absolute Error
mae = mean_absolute_error(actual_values, predictions)
print(f'Mean Absolute Error: {mae}')

# Mean Squared Error
mse = mean_squared_error(actual_values, predictions)
print(f'Mean Squared Error: {mse}')

# R-squared Score
r2 = r2_score(actual_values, predictions)
print(f'R-squared Score: {r2}')

Mean Absolute Error: 0.2035214844380182
Mean Squared Error: 0.12549266331533926
R-squared Score: -3.24898692217449
