In [1]:
import pandas as pd
import os
import numpy as np

In [2]:
path_folder = "/Users/matteolemesre/Desktop/Data LOSC/csv/csv24_25/"
path_start  = os.path.join(path_folder, "clean/data_players.csv")
path_end    = os.path.join(path_folder, "ratings/data_players.csv")

In [3]:
data = pd.read_csv(path_start, index_col=0)

In [4]:
def new_note(score, target, variation_factor):
    difference = score - target 
    if difference < 0:
        difference = 3 * difference / 4
    if difference > 0:
        difference = 9 * difference / 10
    new_score = target + difference * variation_factor
    return max(min(new_score, 10), 0)

In [5]:
def get_position_indices(position):
    if position in ['FW']:  
        return (
            [
                "Goals",
                "Expected Goals (xG)",
                "Shots on Target",
                "Shots Total",
                "Goal-Creating Actions (GCA)",
                "Shot-Creating Actions (SCA)",
                "Key Passes",
                "Expected Assists (xA)",
                "Passes into Penalty Area",
                "Passes into Final Third",
                "Carries into Penalty Area",
                "Carries into Final Third",
                "Progressive Passes",
                "Progressive Carries",
                "Successful Take-Ons",
                "Passes Received",
                "Touches in Attacking Third",
                "Touches in Attacking Penalty Area"
            ],
            [
                "Offsides",
                "Miscontrols",
                "Dispossessed",
                "Ball Losses"
            ],
            [5, 4, 3, 2, 3, 2.5, 2, 2, 1.5, 1, 1, 1.5, 1.5, 1, 1.5, 1, 1, 1]
        )

    elif position == 'MF': 
        return (
            [
                "Passes Completed",
                "Progressive Passes",
                "Passes into Final Third",
                "Passes Attempted",
                "Interceptions",
                "Tackles",
                "Progressive Carries",
                "Carries into Final Third",
                "Key Passes",
                "Blocks",
                "Switches",
                "Ball Recoveries",
                "Dribblers Tackled",
                "Successful Take-Ons",
                "Passes into Penalty Area"
            ],
            [
                "Miscontrols",
                "Dispossessed",
                "Ball Losses",
                "Errors Leading to Shot"
            ],
            [2, 2, 1.5, 1.5, 1.5, 1.5, 1.5, 1, 1, 1, 1, 1, 1, 1, 1]
        )

    elif position == 'DF':  
        return (
            [
                "Tackles",
                "Interceptions",
                "Clearances",
                "Blocks",
                "Aerials Won",
                "Ball Recoveries",
                "Dribblers Tackled",
                "Tackles in Defensive Third",
                "Passes Completed",
                "Progressive Passes",
                "Touches in Defensive Third",
                "Progressive Carries",
                "Passes Attempted",
                "Touches in Defensive Penalty Area",
                "Passes Blocked"
            ],
            [
                "Errors Leading to Shot",
                "Own Goals",
                "Fouls Committed",
                "Dispossessed",
                "Ball Losses"
            ],
            [3, 2.5, 2, 2, 2, 1.5, 1.5, 1, 1, 1, 1, 1, 1, 1, 1]
        )

    else:
        raise ValueError(f"Poste inconnu ou non géré : {position}")


In [6]:
def data_by_position(data):
    positions = data['Position'].unique()
    results = {}
    
    for position in positions:
        position_data = data[data['Position'] == position]
        positive_indices, negative_indices, _ = get_position_indices(position)
        
        indices = positive_indices + negative_indices
        
        results_position = position_data[indices].copy()
        results_position = results_position[indices]
        results_position.index = position_data.index
        results[position] = {stat: results_position[stat].dropna().tolist() for stat in indices}

    return results

In [7]:
def rate_players_by_position(data, target, variation_factor):
    results = []
    data_position = data_by_position(data)
    

    for idx, row in data.iterrows():
        minutes = row["Minutes"]
        if minutes < 20:
            continue
        
        game_week = row['Game Week']
        position = row.get('Position')
        team = row['Team']
        league = row['League']
        
        centiles = {}
        factor = 90 / minutes
        
        positive_indices, negative_indices, weights = get_position_indices(position)
        data_stats = data_position[position]
        indices = positive_indices + negative_indices
        
        for stat, coeff in zip(indices, weights):
            if pd.isnull(row[stat]):
                continue

            player_value = row[stat] * factor

            all_values = np.array(data_stats[stat])  

            if len(all_values) < 3:
                continue 

            if stat in negative_indices:
                centile = (all_values >= player_value).sum() / len(all_values)
            else:
                centile = (all_values <= player_value).sum() / len(all_values)

            centiles[stat] = centile

        if not centiles:
            continue

        raw_note = 10 * sum(centiles[stat] * coeff for stat, coeff in zip(indices, weights) if stat in centiles) / sum(weights)

        raw_note += row.get('Goals', 0)
        raw_note += max(row.get('Assists', 0), row.get("Expected Assists (xA)", 0)) 

        rating = new_note(raw_note, target, variation_factor)
        rating = round(max(0, min(10, rating)), 2)
        
        results.append({
            "Player": idx,
            "Game Week": game_week,
            "Position": position,
            "Team": team,
            "League": league,
            "Minutes": minutes,
            "Rating": rating
        })

    return pd.DataFrame(results)

In [8]:
target = 6.0
variation_factor = 0.75
notes = rate_players_by_position(data, target, variation_factor)

In [9]:
notes.to_csv(path_end, index=False)