In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import joblib
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
red_df = pd.read_csv('reduced_df.csv')

In [None]:
unique_ids = red_df['ID'].unique()

selected_ids = pd.Series(unique_ids).sample(n=30, random_state=42)

testing_df = red_df[red_df['ID'].isin(selected_ids)]

In [None]:
rf_model = joblib.load('xgb_red_model.pkl')

In [None]:
important_nationalities = ['Nationality_St. Kitts & Nevis', 'Nationality_Ireland',
 'Nationality_Ukraine', 'Nationality_Germany', 'Nationality_Türkiye',
 'Nationality_Netherlands', 'Nationality_Cameroon', 'Nationality_Belgium',
 'Nationality_England', 'Nationality_Egypt']
important_clubs = ['Club_Real Madrid', 'Club_Manchester City', 'Club_FC Barcelona',
 'Club_Paris Saint-Germain', 'Club_Bayern Munich', 'Club_Liverpool FC',
 'Club_Arsenal FC', 'Club_Manchester United', 'Club_Chelsea FC',
 'Club_Stade Rennais FC', 'Club_Brentford FC', 'Club_Sporting CP',
 'Club_Juventus FC', 'Club_AC Milan', 'Club_Tottenham Hotspur',
 'Club_SSC Napoli', 'Club_Inter Milan', 'Club_Bayer 04 Leverkusen']

In [None]:
# Define club and nationality columns
club_columns = [col for col in testing_df.columns if col.startswith('Club_')]
nationality_columns = [col for col in testing_df.columns if col.startswith('Nationality_')]

# Convert important_clubs and important_nationalities to Python lists if necessary
important_clubs = important_clubs.tolist() if not isinstance(important_clubs, list) else important_clubs
important_nationalities = important_nationalities.tolist() if not isinstance(important_nationalities, list) else important_nationalities

# Create the 'Club_Other' column
testing_df['Club_Other'] = testing_df[club_columns].loc[:, ~testing_df[club_columns].columns.isin(important_clubs)].sum(axis=1)

# Drop non-important club columns
testing_df = testing_df.drop(columns=[col for col in club_columns if col not in important_clubs])

# Create the 'Nationality_Other' column
testing_df['Nationality_Other'] = testing_df[nationality_columns].loc[:, ~testing_df[nationality_columns].columns.isin(important_nationalities)].sum(axis=1)

# Drop non-important nationality columns
testing_df = testing_df.drop(columns=[col for col in nationality_columns if col not in important_nationalities])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  testing_df['Club_Other'] = testing_df[club_columns].loc[:, ~testing_df[club_columns].columns.isin(important_clubs)].sum(axis=1)


In [None]:
# Reorder testing_df columns to match red_df
matching_columns = [col for col in red_df.columns if col in testing_df.columns]
missing_columns = [col for col in red_df.columns if col not in testing_df.columns]

# Add missing columns to testing_df and set them to 0
for col in missing_columns:
    testing_df[col] = 0

# Reorder columns in testing_df to match red_df
testing_df = testing_df[red_df.columns]

In [None]:
# Predict market values
X_test = testing_df.drop(columns=['Player', 'Season', 'Market_Value_normalizada', 'Market Value'])
testing_df['Predicted Market Value'] = rf_model.predict(X_test)

# Calculate RMSE
rmse = np.sqrt(mean_squared_error(testing_df['Market_Value_normalizada'], testing_df['Predicted Market Value']))

# Calculate R2 score
r2 = r2_score(testing_df['Market_Value_normalizada'], testing_df['Predicted Market Value'])

print(f"RMSE: {rmse}")
print(f"R2 Score: {r2}")



RMSE: 0.10910603886155935
R2 Score: 0.4926207168067527


aqui estasn normalizados cambiarlos


In [None]:
testing_df.head()

Unnamed: 0,ID,Age,days_injured,injuries,games_missed,appearances,own_goals,assists,substitutions_on,substitutions_off,...,Nationality_Netherlands,Nationality_Cameroon,Nationality_Belgium,Nationality_England,Nationality_Egypt,Market_Value_normalizada,Market Value,Player,Season,Predicted Market Value
41,792380,19,50,5,10,25.0,0.0,2.0,6.0,9.0,...,0,0,0,0,0,0.09775,19550000.0,Aleksandar Pavlovic,23/24,0.178166
42,792380,20,52,2,8,9.0,0.0,1.0,0.0,6.0,...,0,0,0,0,0,0.25,50000000.0,Aleksandar Pavlovic,24/25,0.265821
58,349066,17,3,1,0,7.0,0.0,0.0,2.0,2.0,...,0,0,0,0,0,0.0195,3900000.0,Alexander Isak,16/17,0.094379
59,349066,18,32,1,8,13.0,0.0,1.0,10.0,2.0,...,0,0,0,0,0,0.03,6000000.0,Alexander Isak,17/18,0.080107
60,349066,19,61,5,15,29.0,0.0,9.0,1.0,8.0,...,0,0,0,0,0,0.019,3800000.0,Alexander Isak,18/19,0.210602


In [None]:
players = testing_df['Player'].unique()

original_min = testing_df['Market Value'].min()
original_max = testing_df['Market Value'].max()

# Denormalize the column
testing_df['Denormalized_MV'] = testing_df['Predicted Market Value'] * (original_max - original_min) + original_min

fig = go.Figure()

# Add a trace for each player
for player in players:
    player_data = testing_df[testing_df['Player'] == player]

    # Add actual market value trace
    fig.add_trace(go.Scatter(
        x=player_data['Season'],
        y=player_data['Market Value'],
        mode='lines+markers',
        name=f'{player} - Actual',
        visible=False
    ))

    # Add predicted market value trace
    fig.add_trace(go.Scatter(
        x=player_data['Season'],
        y=player_data['Denormalized_MV'],
        mode='lines+markers',
        name=f'{player} - Predicted',
        visible=False
    ))

# Make the first player's data visible by default
fig.data[0].visible = True
fig.data[1].visible = True
custom_order = ['11/12', '12/13', '13/14', '14/15', '15/16', '16/17', '17/18', '18/19', '19/20', '20/21', '21/22', '22/23', '23/24', '24/25']

# Create dropdown menu options
dropdown_options = []
for i, player in enumerate(players):
    dropdown_options.append({
        'label': player,
        'method': 'update',
        'args': [
            {'visible': [False] * len(fig.data)},
            {'title': f"Market Value Over Seasons for {player}"}
        ]
    })
    # Set the corresponding traces visible
    dropdown_options[-1]['args'][0]['visible'][2*i:2*i+2] = [True, True]

# Add the dropdown menu to the figure
fig.update_layout(
    title={
        'text': "Market Value Over Seasons",
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    },
    xaxis={
        'title': "Season",
        'categoryorder': 'array',
        'categoryarray': custom_order
    },
    updatemenus=[
        {
            'buttons': dropdown_options,
            'direction': 'down',
            'showactive': True,
            'x': 0.1,
            'xanchor': 'left',
            'y': 1.2,
            'yanchor': 'top'
        }
    ],
    xaxis_title="Season",
    yaxis_title="Market Value",

    legend_title="Legend"
)

fig.show()

In [None]:
testing_df.to_excel('result_df.xlsx', index=False)
