In [6]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import joblib

# Load the general sheet
general_df = pd.read_excel('General_with_Positions.xlsx')

# Define your target columns (will not be normalized)
target_columns = ['Fifa Ability Overall', 'Rating']

# Separate features and targets
X = general_df.drop(columns=target_columns)
Y = general_df[target_columns]

# Normalize features (X)
scaler_X = MinMaxScaler()
X_normalized = pd.DataFrame(scaler_X.fit_transform(X.select_dtypes(include=['number'])), columns=X.select_dtypes(include=['number']).columns)

# Keep non-numeric (e.g., names) from X if needed
non_numeric = X.select_dtypes(exclude=['number'])
X_normalized = pd.concat([non_numeric.reset_index(drop=True), X_normalized.reset_index(drop=True)], axis=1)

# Combine normalized features with untouched targets
df_normalized = pd.concat([X_normalized, Y.reset_index(drop=True)], axis=1)

# Save the result
df_normalized.to_excel('General_Normalized_Features.xlsx', index=False)

# Optionally save the scaler for inverse transform later
joblib.dump(scaler_X, 'scaler_X_general.pkl')
df_normalized.head()

Unnamed: 0,Apps,Minutes played,Assists,Yel,Red,Aerials Won per game,Man of the match,Tackles,Interceptions per game,Fouls,...,finishing,creativity,distribution,defense,duels,DEF,MID,OFF,Fifa Ability Overall,Rating
0,0.756757,0.789567,0.866667,0.2,0.0,0.026667,1.0,0.061538,0.03125,0.15,...,1.0,0.927374,0.725552,0.010204,0.493976,0.0,1.0,1.0,94,8.48
1,0.783784,0.783343,0.533333,0.2,0.0,0.146667,0.588235,0.046154,0.0625,0.15,...,0.583333,0.530726,0.749211,0.122449,0.638554,0.0,1.0,1.0,94,7.68
2,0.405405,0.414345,0.466667,0.133333,0.0,0.066667,0.411765,0.123077,0.09375,0.175,...,0.416667,0.581006,0.717666,0.040816,0.0,0.0,1.0,1.0,92,8.26
3,0.810811,0.825133,0.4,0.333333,0.0,0.093333,0.294118,0.076923,0.0625,0.225,...,0.583333,0.418994,0.597792,0.091837,0.53012,0.0,1.0,1.0,91,7.57
4,0.810811,0.7623,0.4,0.4,0.0,0.053333,0.058824,0.215385,0.28125,0.2,...,0.083333,0.430168,0.859621,0.112245,0.722892,0.0,1.0,0.0,91,7.03


In [None]:
POSITION_FEATURE_WEIGHTS = {
    'OFF': {
        'finishing': 3,
        'creativity': 3,
        'distribution': 1,
        'defense': 1,
        'duels': 2
    },
    'MID': {
        'finishing': 2,
        'creativity': 3,
        'distribution': 3,
        'defense': 2,
        'duels': 2
    },
    'DEF': {
        'finishing': 1,
        'creativity': 1,
        'distribution': 2,
        'defense': 4,
        'duels': 3
    }
}

# Convert to DataFrame for broadcasting
def weights_to_df(weights, feature_list):
    return pd.Series({f: weights.get(f, 1) for f in feature_list})

def apply_position_weighting(df, feature_cols):
    off_w = weights_to_df(POSITION_FEATURE_WEIGHTS['OFF'], feature_cols)
    mid_w = weights_to_df(POSITION_FEATURE_WEIGHTS['MID'], feature_cols)
    def_w = weights_to_df(POSITION_FEATURE_WEIGHTS['DEF'], feature_cols)

    # Stack into matrix
    weight_matrix = (
        df['OFF'].values[:, None] * off_w.values +
        df['MID'].values[:, None] * mid_w.values +
        df['DEF'].values[:, None] * def_w.values
    )
# Convert to DataFrame for broadcasting
def weights_to_df(weights, feature_list):
    return pd.Series({f: weights.get(f, 1) for f in feature_list})

def apply_position_weighting(df, feature_cols):
    off_w = weights_to_df(POSITION_FEATURE_WEIGHTS['OFF'], feature_cols)
    mid_w = weights_to_df(POSITION_FEATURE_WEIGHTS['MID'], feature_cols)
    def_w = weights_to_df(POSITION_FEATURE_WEIGHTS['DEF'], feature_cols)

    # Stack into matrix
    weight_matrix = (
        df['OFF'].values[:, None] * off_w.values +
        df['MID'].values[:, None] * mid_w.values +
        df['DEF'].values[:, None] * def_w.values
    )

    # Multiply feature values by weight matrix
    df_weighted = df[feature_cols].values * weight_matrix

    # Return updated DataFrame with weighted features
    weighted_df = pd.DataFrame(df_weighted, columns=feature_cols)
    
    # Optionally retain other columns (name, targets)
    other_cols = df.drop(columns=feature_cols)
    return pd.concat([other_cols.reset_index(drop=True), weighted_df.reset_index(drop=True)], axis=1)

In [12]:
# Example use:
feature_cols = [col for col in df_normalized.columns if col not in ['Rating', 'Fifa Ability Overall', 'OFF', 'MID', 'DEF', 'Minutes played', 'Apps', 'Yel', 'Red', 'Pass success percentage']]
df_weighted = apply_position_weighting(df_normalized, feature_cols)
df_weighted.head()

# save the weighted DataFrame
df_weighted.to_excel('General_Weighted_Features.xlsx', index=False)