In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [3]:
df_u21 = pd.read_csv("../Datos/real_players_u21.csv")

In [4]:
df_u21_fifa = pd.read_csv("../Datos/fifa_players_u21.csv")

In [5]:
# Número de duplicaciones que deseas realizar (por ejemplo, duplicar 2 veces)
num_duplicaciones = 6

# Realizar duplicación de filas
merged_df_oversampled = pd.concat([df_u21] * num_duplicaciones, ignore_index=True)

# Mostrar una muestra del DataFrame resultante
merged_df_oversampled


Unnamed: 0,Player,Age,Nat.,Club,Market value,minutes_played,goals,assists,position,foot,height_in_cm
0,Jude Bellingham,21,England,Real Madrid,180.0,13820.0,47.0,38.0,Midfield,right,186.0
1,Florian Wirtz,21,Germany,Bayer 04 Leverkusen,130.0,10929.0,41.0,50.0,Midfield,right,177.0
2,Jamal Musiala,21,Germany,Bayern Munich,130.0,9175.0,43.0,31.0,Midfield,right,184.0
3,Lamine Yamal,17,Spain,FC Barcelona,120.0,2964.0,7.0,9.0,Attack,left,180.0
4,Eduardo Camavinga,21,France,Real Madrid,100.0,13733.0,4.0,13.0,Midfield,left,182.0
...,...,...,...,...,...,...,...,...,...,...,...
2095,Dexter Lembikisa,20,Jamaica,Wolverhampton Wanderers U21,3.0,1176.0,1.0,0.0,Defender,right,180.0
2096,Anton Gaaei,21,Denmark,Ajax Amsterdam,3.0,4030.0,2.0,9.0,Defender,right,183.0
2097,Nikita Saltykov,20,Russia,Lokomotiv Moscow,3.0,2556.0,4.0,5.0,Attack,right,174.0
2098,Aljoscha Kemlein,20,Germany,1.FC Union Berlin,3.0,108.0,0.0,0.0,Midfield,right,185.0


In [6]:
X = merged_df_oversampled.drop(columns = ["Player", "Nat.", "Club", "position", "foot", "Market value"])
y = merged_df_oversampled["Market value"]

In [7]:
def modelo(data, model, target):

    # Separate features and target
    X = merged_df_oversampled.drop(columns = ["Player", "Nat.", "Club", "position", "foot", "Market value"])
    y = merged_df_oversampled["Market value"]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

    # Normalize the features
    normalizer = MinMaxScaler().fit(X_train)
    X_train_norm = normalizer.transform(X_train)
    X_test_norm = normalizer.transform(X_test)

    # Convert the normalized arrays back to DataFrames
    X_train_norm = pd.DataFrame(X_train_norm, columns=X_train.columns)
    X_test_norm = pd.DataFrame(X_test_norm, columns=X_test.columns)

    # Train the provided model
    model.fit(X_train_norm, y_train)

    # Make predictions with the provided model
    pred = model.predict(X_test_norm)

    # Calculate and print evaluation metrics
    mae = mean_absolute_error(pred, y_test)
    rmse = mean_squared_error(pred, y_test, squared=False)
    r2 = model.score(X_test_norm, y_test)

    print("Model Evaluation Metrics:")
    print("MAE:", mae)
    print("RMSE:", rmse)
    print("R2 score:", r2)

    # Initialize results dictionary
    results = {
        'MAE': mae,
        'RMSE': rmse,
        'R2': r2
    }

    # Check if the model has coefficients (for linear models)
    if hasattr(model, 'coef_'):
        lin_reg_coef = {feature: coef for feature, coef in zip(X_train_norm.columns, model.coef_)}
        results['coefficients'] = lin_reg_coef

    # Check if the model has feature importances (for tree-based models)
    if hasattr(model, 'feature_importances_'):
        tree_importance = {feature: importance for feature, importance in zip(X_train_norm.columns, model.feature_importances_)}
        results['feature_importances'] = tree_importance
    
    return results

In [8]:
target =  merged_df_oversampled["Market value"]
data = merged_df_oversampled
model = LinearRegression()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 9.652409117266295
RMSE: 16.401146467990063
R2 score: 0.2641304944890339


In [9]:
target =  merged_df_oversampled["Market value"]
data = merged_df_oversampled
model = DecisionTreeRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 0.0
RMSE: 0.0
R2 score: 1.0


In [10]:
target =  merged_df_oversampled["Market value"]
data = merged_df_oversampled
model = GradientBoostingRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 4.823380775997717
RMSE: 6.281028628482196
R2 score: 0.8920770177304718


In [11]:
target =  merged_df_oversampled["Market value"]
data = merged_df_oversampled
model = RandomForestRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 0.2538142857142857
RMSE: 0.5799531426064073
R2 score: 0.9990798932764965


In [12]:
target =  merged_df_oversampled["Market value"]
data = merged_df_oversampled
model = SVR()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 8.315872798349067
RMSE: 17.198840233248045
R2 score: 0.1908096100261355


In [13]:
target =  merged_df_oversampled["Market value"]
data = merged_df_oversampled
model = XGBRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 0.14290599607285998
RMSE: 0.19829285622574905
R2 score: 0.9998924361028184


In [14]:
target =  merged_df_oversampled["Market value"]
data = merged_df_oversampled
model = KNeighborsRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 2.0506666666666664
RMSE: 4.095508341027836
R2 score: 0.9541152867937706


In [15]:
df_u21_fifa

Unnamed: 0,name,nation,position,age,overall,potential,total_gk,total_stats,pace,shooting,passing,dribbling,defending,physical,team,altura,foot,cost
0,Jamal Musiala,Germany,CAM,20,86,93,42,2090,85,75,76,91,63,61,FC Bayern München,184,Right,134.5
1,Jude Bellingham,England,CAM,20,88,92,48,2295,80,83,81,87,78,83,Real Madrid,186,Right,128.5
2,Florian Wirtz,Germany,CAM,20,87,92,57,2089,80,76,86,88,50,63,Bayer 04 Leverkusen,177,Right,118.5
3,Pedro González López,Spain,CM,20,86,92,46,2138,78,69,82,88,70,74,FC Barcelona,174,Right,105.0
4,Bukayo Saka,England,RW,21,86,90,50,2196,85,82,81,88,60,70,Arsenal,178,Left,99.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,Thimothée Lo-Tutala,France,GK,20,63,75,314,918,64,62,61,67,21,60,Doncaster Rovers,186,Right,1.0
1996,Manuel Morillo León,Spain,ST,19,62,79,59,1502,76,61,51,65,21,47,Real Betis,182,Right,1.0
1997,Antonino Jastin García López,Portugal,LM,19,62,78,47,1439,77,55,53,65,23,43,Girona,180,Right,1.0
1998,Iker Almena Horcajo,Spain,RM,19,62,80,51,1450,74,54,56,63,25,46,Girona,176,Left,1.0


In [16]:
def modelo(data, model, target):

    # Separate features and target
    X = df_u21_fifa.drop(columns = ["name", "nation", "team", "position", "foot", "cost"])
    y = df_u21_fifa["cost"]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

    # Normalize the features
    normalizer = MinMaxScaler().fit(X_train)
    X_train_norm = normalizer.transform(X_train)
    X_test_norm = normalizer.transform(X_test)

    # Convert the normalized arrays back to DataFrames
    X_train_norm = pd.DataFrame(X_train_norm, columns=X_train.columns)
    X_test_norm = pd.DataFrame(X_test_norm, columns=X_test.columns)

    # Train the provided model
    model.fit(X_train_norm, y_train)

    # Make predictions with the provided model
    pred = model.predict(X_test_norm)

    # Calculate and print evaluation metrics
    mae = mean_absolute_error(pred, y_test)
    rmse = mean_squared_error(pred, y_test, squared=False)
    r2 = model.score(X_test_norm, y_test)

    print("Model Evaluation Metrics:")
    print("MAE:", mae)
    print("RMSE:", rmse)
    print("R2 score:", r2)

    # Initialize results dictionary
    results = {
        'MAE': mae,
        'RMSE': rmse,
        'R2': r2
    }

    # Check if the model has coefficients (for linear models)
    if hasattr(model, 'coef_'):
        lin_reg_coef = {feature: coef for feature, coef in zip(X_train_norm.columns, model.coef_)}
        results['coefficients'] = lin_reg_coef

    # Check if the model has feature importances (for tree-based models)
    if hasattr(model, 'feature_importances_'):
        tree_importance = {feature: importance for feature, importance in zip(X_train_norm.columns, model.feature_importances_)}
        results['feature_importances'] = tree_importance
    
    return results

In [17]:
target =  df_u21_fifa["cost"]
data = df_u21_fifa
model = LinearRegression()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 3.2293775241349976
RMSE: 7.005598824510917
R2 score: 0.5140429102533819


In [18]:
target =  df_u21_fifa["cost"]
data = df_u21_fifa
model = DecisionTreeRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 0.5217500000000002
RMSE: 3.747542528110922
R2 score: 0.8609405411629258


In [19]:
target =  df_u21_fifa["cost"]
data = df_u21_fifa
model = GradientBoostingRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 0.37591796157400076
RMSE: 1.9775498680289996
R2 score: 0.9612775985832912


In [20]:
target =  df_u21_fifa["cost"]
data = df_u21_fifa
model = RandomForestRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 0.3026475000000001
RMSE: 1.5710961324820327
R2 score: 0.9755593408750524


In [21]:
target =  df_u21_fifa["cost"]
data = df_u21_fifa
model = SVR()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 1.6743072833870645
RMSE: 7.860762657298178
R2 score: 0.3881615553809147


In [22]:
target =  df_u21_fifa["cost"]
data = df_u21_fifa
model = XGBRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 0.2849180838465691
RMSE: 1.7221248490324879
R2 score: 0.9706345499222024


In [23]:
target =  df_u21_fifa["cost"]
data = df_u21_fifa
model = KNeighborsRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 1.2638
RMSE: 4.661886742511019
R2 score: 0.7848056662981084


## Ambos grupos unidos

In [24]:
# Renombrar la columna 'name' a 'Player' en df_players
df_u21_fifa.rename(columns={'name': 'Player'}, inplace=True)

# Seleccionamos solo las columnas que queremos agregar de df_players
df_players_selected = df_u21_fifa[['Player', 'overall', 'potential', 'total_gk', 'total_stats', 
                                  'pace', 'shooting', 'passing', 'dribbling', 
                                  'defending', 'physical', 'cost']]

# Unimos los DataFrames usando la columna 'Player' como clave
df_completo = pd.merge(df_u21, df_players_selected, on='Player', how='left')

# Mostrar el DataFrame resultante
df_completo


Unnamed: 0,Player,Age,Nat.,Club,Market value,minutes_played,goals,assists,position,foot,...,potential,total_gk,total_stats,pace,shooting,passing,dribbling,defending,physical,cost
0,Jude Bellingham,21,England,Real Madrid,180.0,13820.0,47.0,38.0,Midfield,right,...,92.0,48.0,2295.0,80.0,83.0,81.0,87.0,78.0,83.0,128.5
1,Florian Wirtz,21,Germany,Bayer 04 Leverkusen,130.0,10929.0,41.0,50.0,Midfield,right,...,92.0,57.0,2089.0,80.0,76.0,86.0,88.0,50.0,63.0,118.5
2,Jamal Musiala,21,Germany,Bayern Munich,130.0,9175.0,43.0,31.0,Midfield,right,...,93.0,42.0,2090.0,85.0,75.0,76.0,91.0,63.0,61.0,134.5
3,Lamine Yamal,17,Spain,FC Barcelona,120.0,2964.0,7.0,9.0,Attack,left,...,,,,,,,,,,
4,Eduardo Camavinga,21,France,Real Madrid,100.0,13733.0,4.0,13.0,Midfield,left,...,90.0,50.0,2197.0,80.0,67.0,81.0,84.0,78.0,80.0,71.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,Dexter Lembikisa,20,Jamaica,Wolverhampton Wanderers U21,3.0,1176.0,1.0,0.0,Defender,right,...,73.0,45.0,1614.0,83.0,43.0,54.0,60.0,59.0,59.0,1.2
346,Anton Gaaei,21,Denmark,Ajax Amsterdam,3.0,4030.0,2.0,9.0,Defender,right,...,78.0,50.0,1724.0,84.0,48.0,57.0,65.0,59.0,67.0,2.1
347,Nikita Saltykov,20,Russia,Lokomotiv Moscow,3.0,2556.0,4.0,5.0,Attack,right,...,,,,,,,,,,
348,Aljoscha Kemlein,20,Germany,1.FC Union Berlin,3.0,108.0,0.0,0.0,Midfield,right,...,78.0,53.0,1738.0,60.0,57.0,63.0,65.0,62.0,63.0,1.9


In [25]:
df_completo.isna().sum()

Player              0
Age                 0
Nat.                0
Club                0
Market value        0
minutes_played      0
goals               0
assists             0
position            0
foot                0
height_in_cm        0
overall           109
potential         109
total_gk          109
total_stats       109
pace              109
shooting          109
passing           109
dribbling         109
defending         109
physical          109
cost              109
dtype: int64

In [26]:
df_completo.dropna(inplace=True)

In [27]:
df_completo

Unnamed: 0,Player,Age,Nat.,Club,Market value,minutes_played,goals,assists,position,foot,...,potential,total_gk,total_stats,pace,shooting,passing,dribbling,defending,physical,cost
0,Jude Bellingham,21,England,Real Madrid,180.0,13820.0,47.0,38.0,Midfield,right,...,92.0,48.0,2295.0,80.0,83.0,81.0,87.0,78.0,83.0,128.5
1,Florian Wirtz,21,Germany,Bayer 04 Leverkusen,130.0,10929.0,41.0,50.0,Midfield,right,...,92.0,57.0,2089.0,80.0,76.0,86.0,88.0,50.0,63.0,118.5
2,Jamal Musiala,21,Germany,Bayern Munich,130.0,9175.0,43.0,31.0,Midfield,right,...,93.0,42.0,2090.0,85.0,75.0,76.0,91.0,63.0,61.0,134.5
4,Eduardo Camavinga,21,France,Real Madrid,100.0,13733.0,4.0,13.0,Midfield,left,...,90.0,50.0,2197.0,80.0,67.0,81.0,84.0,78.0,80.0,71.5
7,Xavi Simons,21,Netherlands,RB Leipzig,80.0,7397.0,32.0,27.0,Midfield,right,...,89.0,43.0,2139.0,87.0,77.0,78.0,85.0,61.0,74.0,60.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
342,Matias Fernandez-Pardo,19,Belgium,KAA Gent,3.0,260.0,1.0,0.0,Attack,right,...,78.0,50.0,1560.0,75.0,58.0,59.0,67.0,28.0,51.0,1.0
345,Dexter Lembikisa,20,Jamaica,Wolverhampton Wanderers U21,3.0,1176.0,1.0,0.0,Defender,right,...,73.0,45.0,1614.0,83.0,43.0,54.0,60.0,59.0,59.0,1.2
346,Anton Gaaei,21,Denmark,Ajax Amsterdam,3.0,4030.0,2.0,9.0,Defender,right,...,78.0,50.0,1724.0,84.0,48.0,57.0,65.0,59.0,67.0,2.1
348,Aljoscha Kemlein,20,Germany,1.FC Union Berlin,3.0,108.0,0.0,0.0,Midfield,right,...,78.0,53.0,1738.0,60.0,57.0,63.0,65.0,62.0,63.0,1.9


In [28]:
num_duplicaciones = 5

# Realizar duplicación de filas
df_completo_over = pd.concat([df_completo] * num_duplicaciones, ignore_index=True)

# Mostrar una muestra del DataFrame resultante
df_completo_over

Unnamed: 0,Player,Age,Nat.,Club,Market value,minutes_played,goals,assists,position,foot,...,potential,total_gk,total_stats,pace,shooting,passing,dribbling,defending,physical,cost
0,Jude Bellingham,21,England,Real Madrid,180.0,13820.0,47.0,38.0,Midfield,right,...,92.0,48.0,2295.0,80.0,83.0,81.0,87.0,78.0,83.0,128.5
1,Florian Wirtz,21,Germany,Bayer 04 Leverkusen,130.0,10929.0,41.0,50.0,Midfield,right,...,92.0,57.0,2089.0,80.0,76.0,86.0,88.0,50.0,63.0,118.5
2,Jamal Musiala,21,Germany,Bayern Munich,130.0,9175.0,43.0,31.0,Midfield,right,...,93.0,42.0,2090.0,85.0,75.0,76.0,91.0,63.0,61.0,134.5
3,Eduardo Camavinga,21,France,Real Madrid,100.0,13733.0,4.0,13.0,Midfield,left,...,90.0,50.0,2197.0,80.0,67.0,81.0,84.0,78.0,80.0,71.5
4,Xavi Simons,21,Netherlands,RB Leipzig,80.0,7397.0,32.0,27.0,Midfield,right,...,89.0,43.0,2139.0,87.0,77.0,78.0,85.0,61.0,74.0,60.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1200,Matias Fernandez-Pardo,19,Belgium,KAA Gent,3.0,260.0,1.0,0.0,Attack,right,...,78.0,50.0,1560.0,75.0,58.0,59.0,67.0,28.0,51.0,1.0
1201,Dexter Lembikisa,20,Jamaica,Wolverhampton Wanderers U21,3.0,1176.0,1.0,0.0,Defender,right,...,73.0,45.0,1614.0,83.0,43.0,54.0,60.0,59.0,59.0,1.2
1202,Anton Gaaei,21,Denmark,Ajax Amsterdam,3.0,4030.0,2.0,9.0,Defender,right,...,78.0,50.0,1724.0,84.0,48.0,57.0,65.0,59.0,67.0,2.1
1203,Aljoscha Kemlein,20,Germany,1.FC Union Berlin,3.0,108.0,0.0,0.0,Midfield,right,...,78.0,53.0,1738.0,60.0,57.0,63.0,65.0,62.0,63.0,1.9


In [29]:
df_completo_over

Unnamed: 0,Player,Age,Nat.,Club,Market value,minutes_played,goals,assists,position,foot,...,potential,total_gk,total_stats,pace,shooting,passing,dribbling,defending,physical,cost
0,Jude Bellingham,21,England,Real Madrid,180.0,13820.0,47.0,38.0,Midfield,right,...,92.0,48.0,2295.0,80.0,83.0,81.0,87.0,78.0,83.0,128.5
1,Florian Wirtz,21,Germany,Bayer 04 Leverkusen,130.0,10929.0,41.0,50.0,Midfield,right,...,92.0,57.0,2089.0,80.0,76.0,86.0,88.0,50.0,63.0,118.5
2,Jamal Musiala,21,Germany,Bayern Munich,130.0,9175.0,43.0,31.0,Midfield,right,...,93.0,42.0,2090.0,85.0,75.0,76.0,91.0,63.0,61.0,134.5
3,Eduardo Camavinga,21,France,Real Madrid,100.0,13733.0,4.0,13.0,Midfield,left,...,90.0,50.0,2197.0,80.0,67.0,81.0,84.0,78.0,80.0,71.5
4,Xavi Simons,21,Netherlands,RB Leipzig,80.0,7397.0,32.0,27.0,Midfield,right,...,89.0,43.0,2139.0,87.0,77.0,78.0,85.0,61.0,74.0,60.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1200,Matias Fernandez-Pardo,19,Belgium,KAA Gent,3.0,260.0,1.0,0.0,Attack,right,...,78.0,50.0,1560.0,75.0,58.0,59.0,67.0,28.0,51.0,1.0
1201,Dexter Lembikisa,20,Jamaica,Wolverhampton Wanderers U21,3.0,1176.0,1.0,0.0,Defender,right,...,73.0,45.0,1614.0,83.0,43.0,54.0,60.0,59.0,59.0,1.2
1202,Anton Gaaei,21,Denmark,Ajax Amsterdam,3.0,4030.0,2.0,9.0,Defender,right,...,78.0,50.0,1724.0,84.0,48.0,57.0,65.0,59.0,67.0,2.1
1203,Aljoscha Kemlein,20,Germany,1.FC Union Berlin,3.0,108.0,0.0,0.0,Midfield,right,...,78.0,53.0,1738.0,60.0,57.0,63.0,65.0,62.0,63.0,1.9


In [30]:
df_completo_over.isna().sum()

Player            0
Age               0
Nat.              0
Club              0
Market value      0
minutes_played    0
goals             0
assists           0
position          0
foot              0
height_in_cm      0
overall           0
potential         0
total_gk          0
total_stats       0
pace              0
shooting          0
passing           0
dribbling         0
defending         0
physical          0
cost              0
dtype: int64

In [31]:
df_completo_over.columns

Index(['Player', 'Age', 'Nat.', 'Club', 'Market value', 'minutes_played',
       'goals', 'assists', 'position', 'foot', 'height_in_cm', 'overall',
       'potential', 'total_gk', 'total_stats', 'pace', 'shooting', 'passing',
       'dribbling', 'defending', 'physical', 'cost'],
      dtype='object')

In [32]:
def modelo(data, model, target):

    # Separate features and target
    X = df_completo_over.drop(columns = ['Player', 'Nat.', 'Club', 'position', 'foot', 'cost'])
    y = df_completo_over["cost"]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

    # Normalize the features
    normalizer = MinMaxScaler().fit(X_train)
    X_train_norm = normalizer.transform(X_train)
    X_test_norm = normalizer.transform(X_test)

    # Convert the normalized arrays back to DataFrames
    X_train_norm = pd.DataFrame(X_train_norm, columns=X_train.columns)
    X_test_norm = pd.DataFrame(X_test_norm, columns=X_test.columns)

    # Train the provided model
    model.fit(X_train_norm, y_train)

    # Make predictions with the provided model
    pred = model.predict(X_test_norm)

    # Calculate and print evaluation metrics
    mae = mean_absolute_error(pred, y_test)
    rmse = mean_squared_error(pred, y_test, squared=False)
    r2 = model.score(X_test_norm, y_test)

    print("Model Evaluation Metrics:")
    print("MAE:", mae)
    print("RMSE:", rmse)
    print("R2 score:", r2)

    # Initialize results dictionary
    results = {
        'MAE': mae,
        'RMSE': rmse,
        'R2': r2
    }

    # Check if the model has coefficients (for linear models)
    if hasattr(model, 'coef_'):
        lin_reg_coef = {feature: coef for feature, coef in zip(X_train_norm.columns, model.coef_)}
        results['coefficients'] = lin_reg_coef

    # Check if the model has feature importances (for tree-based models)
    if hasattr(model, 'feature_importances_'):
        tree_importance = {feature: importance for feature, importance in zip(X_train_norm.columns, model.feature_importances_)}
        results['feature_importances'] = tree_importance
    
    return results

In [52]:
target =  df_completo_over["cost"]
data = df_completo_over
model = LinearRegression()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 5.464757641836504
RMSE: 8.017963890467337
R2 score: 0.8710949155969889


In [53]:
target =  df_completo_over["cost"]
data = df_completo_over
model = DecisionTreeRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 0.0
RMSE: 0.0
R2 score: 1.0


In [54]:
target =  df_completo_over["cost"]
data = df_completo_over
model = GradientBoostingRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 2.4486711924373132
RMSE: 3.5113003786796884
R2 score: 0.9752783290179556


In [55]:
target =  df_completo_over["cost"]
data = df_completo_over
model = RandomForestRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 0.46138174273858923
RMSE: 1.5244575131972578
R2 score: 0.9953401438998639


In [37]:
target =  df_completo_over["cost"]
data = df_completo_over
model = SVR()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 3.417574193332447
RMSE: 13.356172914654111
R2 score: 0.4216905121624521


In [56]:
target =  df_completo_over["cost"]
data = df_completo_over
model = XGBRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 0.0014532433505869504
RMSE: 0.0022656675711346407
R2 score: 0.9999999897071834


In [39]:
target =  df_completo_over["cost"]
data = df_completo_over
model = KNeighborsRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 0.9346058091286309
RMSE: 2.1118751377651033
R2 score: 0.9855411841533213


In [40]:
def modelo(data, model, target):

    # Separate features and target
    X = df_completo_over.drop(columns = ['Player', 'Nat.', 'Club', 'position', 'foot', 'Market value'])
    y = df_completo_over["Market value"]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

    # Normalize the features
    normalizer = MinMaxScaler().fit(X_train)
    X_train_norm = normalizer.transform(X_train)
    X_test_norm = normalizer.transform(X_test)

    # Convert the normalized arrays back to DataFrames
    X_train_norm = pd.DataFrame(X_train_norm, columns=X_train.columns)
    X_test_norm = pd.DataFrame(X_test_norm, columns=X_test.columns)

    # Train the provided model
    model.fit(X_train_norm, y_train)

    # Make predictions with the provided model
    pred = model.predict(X_test_norm)

    # Calculate and print evaluation metrics
    mae = mean_absolute_error(pred, y_test)
    rmse = mean_squared_error(pred, y_test, squared=False)
    r2 = model.score(X_test_norm, y_test)

    print("Model Evaluation Metrics:")
    print("MAE:", mae)
    print("RMSE:", rmse)
    print("R2 score:", r2)

    # Initialize results dictionary
    results = {
        'MAE': mae,
        'RMSE': rmse,
        'R2': r2
    }

    # Check if the model has coefficients (for linear models)
    if hasattr(model, 'coef_'):
        lin_reg_coef = {feature: coef for feature, coef in zip(X_train_norm.columns, model.coef_)}
        results['coefficients'] = lin_reg_coef

    # Check if the model has feature importances (for tree-based models)
    if hasattr(model, 'feature_importances_'):
        tree_importance = {feature: importance for feature, importance in zip(X_train_norm.columns, model.feature_importances_)}
        results['feature_importances'] = tree_importance
    
    return results

In [41]:
target =  df_completo_over["Market value"]
data = df_completo_over
model = LinearRegression()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 5.464757641836504
RMSE: 8.017963890467337
R2 score: 0.8710949155969889


In [42]:
target =  df_completo_over["Market value"]
data = df_completo_over
model = DecisionTreeRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 0.0
RMSE: 0.0
R2 score: 1.0


In [57]:
target =  df_completo_over["Market value"]
data = df_completo
model = GradientBoostingRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 2.4486711924373132
RMSE: 3.5113003786796875
R2 score: 0.9752783290179556


In [58]:
target =  df_completo_over["Market value"]
data = df_completo
model = RandomForestRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 0.5409460580912863
RMSE: 1.8394608965785308
R2 score: 0.9932154198799037


In [45]:
target =  df_completo_over["Market value"]
data = df_completo_over
model = SVR()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 7.862900102988761
RMSE: 17.44245047514149
R2 score: 0.38996205055922195


In [46]:
target =  df_completo_over["Market value"]
data = df_completo_over
model = XGBRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 0.0014532433505869504
RMSE: 0.0022656675711346407
R2 score: 0.9999999897071834


In [59]:
target =  df_completo_over["Market value"]
data = df_completo_over
model = KNeighborsRegressor()
results = modelo(data, model, target)

Model Evaluation Metrics:
MAE: 2.8441493775933613
RMSE: 4.863054723608741
R2 score: 0.9525801507665111


In [48]:
datos_prueba_jugadores = {
    'hola': input()
}

In [60]:
df_completo_over

Unnamed: 0,Player,Age,Nat.,Club,Market value,minutes_played,goals,assists,position,foot,...,potential,total_gk,total_stats,pace,shooting,passing,dribbling,defending,physical,cost
0,Jude Bellingham,21,England,Real Madrid,180.0,13820.0,47.0,38.0,Midfield,right,...,92.0,48.0,2295.0,80.0,83.0,81.0,87.0,78.0,83.0,128.5
1,Florian Wirtz,21,Germany,Bayer 04 Leverkusen,130.0,10929.0,41.0,50.0,Midfield,right,...,92.0,57.0,2089.0,80.0,76.0,86.0,88.0,50.0,63.0,118.5
2,Jamal Musiala,21,Germany,Bayern Munich,130.0,9175.0,43.0,31.0,Midfield,right,...,93.0,42.0,2090.0,85.0,75.0,76.0,91.0,63.0,61.0,134.5
3,Eduardo Camavinga,21,France,Real Madrid,100.0,13733.0,4.0,13.0,Midfield,left,...,90.0,50.0,2197.0,80.0,67.0,81.0,84.0,78.0,80.0,71.5
4,Xavi Simons,21,Netherlands,RB Leipzig,80.0,7397.0,32.0,27.0,Midfield,right,...,89.0,43.0,2139.0,87.0,77.0,78.0,85.0,61.0,74.0,60.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1200,Matias Fernandez-Pardo,19,Belgium,KAA Gent,3.0,260.0,1.0,0.0,Attack,right,...,78.0,50.0,1560.0,75.0,58.0,59.0,67.0,28.0,51.0,1.0
1201,Dexter Lembikisa,20,Jamaica,Wolverhampton Wanderers U21,3.0,1176.0,1.0,0.0,Defender,right,...,73.0,45.0,1614.0,83.0,43.0,54.0,60.0,59.0,59.0,1.2
1202,Anton Gaaei,21,Denmark,Ajax Amsterdam,3.0,4030.0,2.0,9.0,Defender,right,...,78.0,50.0,1724.0,84.0,48.0,57.0,65.0,59.0,67.0,2.1
1203,Aljoscha Kemlein,20,Germany,1.FC Union Berlin,3.0,108.0,0.0,0.0,Midfield,right,...,78.0,53.0,1738.0,60.0,57.0,63.0,65.0,62.0,63.0,1.9


In [61]:
df_completo_over.to_csv ("Tabla_total.csv", index= False)