In [1]:
import numpy as np
import pandas as pd
import os
from collections import defaultdict

In [2]:
path_folder = "/Users/matteolemesre/Desktop/Data LOSC/csv/csv24_25/"
path_start  = os.path.join(path_folder, "clean/data_goals.csv")
path_end    = os.path.join(path_folder, "ratings/data_goals.csv")

In [3]:
data = pd.read_csv(path_start, index_col=0)

In [4]:
def new_note(score, target, variation_factor):
    difference = score - target 
    if difference < 0:
        difference = 3 * difference / 4
    if difference > 0:
        difference = difference
    new_score = target + difference * variation_factor
    return max(min(new_score, 10), 0)

In [5]:
def get_position_indices():
    indices_comp = [
        'Goals Against',              
        'Save Efficiency',           
        'Saves',                      
        'Shots on Target Against',    
        'Completed Long Passes',      
        'Crosses Stopped',            
        'Defensive Actions Outside Penalty Area',  
        "xG évités"
    ]
    
    indices_neg = [
        'Goals Against', 
        'Post-Shot Expected Goals (PSxG)', 
        'Shots on Target Against'
    ]  
    
    coeff_indices = [3, 4, 2, 1, 2, 3, 1, 2, 2]  

    return indices_comp, indices_neg, coeff_indices

In [6]:
def data_by_position(data):
    results = {}
    
    positive_indices, negative_indices, _ = get_position_indices()
        
    indices = positive_indices + negative_indices
        
    results_position = data[indices].copy()
    results_position = results_position[indices]
    results_position.index = data.index
    results = {stat: results_position[stat].dropna().iloc[:, 0].tolist() if isinstance(results_position[stat], pd.DataFrame) else results_position[stat].dropna().tolist() for stat in indices}

    return results

In [7]:
def notation(data, target, variation_factor):
    results = []

    data = data.copy()
    data["xG évités"] = data["Post-Shot Expected Goals (PSxG)"] - data["Goals Against"]

    positive_indices, negative_indices, weights = get_position_indices()
    indices = positive_indices + negative_indices
    
    data_stats = data_by_position(data)

    for idx, row in data.iterrows():
        minutes = row['Minutes']
        if minutes < 30 or pd.isnull(minutes):
            continue

        game_week = row['Game Week']
        position = row.get('Position', 'GK')
        team = row['Team']
        league = row['League']

        centiles = {}
        factor = 90 / minutes
        
        for stat, coeff in zip(indices, weights):
            if pd.isnull(row[stat]):
                continue

            player_value = row[stat] * factor

            all_values = np.array(data_stats[stat])  

            if len(all_values) < 3:
                continue 

            if stat in negative_indices:
                centile = (all_values >= player_value).sum() / len(all_values)
            else:
                centile = (all_values <= player_value).sum() / len(all_values)

            centiles[stat] = centile

        if not centiles:
            continue

        raw_note = 10 * sum(centiles[stat] * coeff for stat, coeff in zip(indices, weights) if stat in centiles) / sum(weights)

        raw_note += row.get('Clean Sheets', 0)

        rating = new_note(raw_note, target, variation_factor)
        rating = round(max(0, min(10, rating)), 2)

        results.append({
            "Player": idx,
            "Game Week": game_week,
            "Position": position,
            "Team": team,
            "League": league,
            "Minutes": minutes,
            "Rating": rating
        })

    return pd.DataFrame(results)


In [8]:
target = 6.0
variation_factor = 0.75
notes = notation(data, target, variation_factor)

In [9]:
notes.to_csv(path_end, index=False)