In [1]:
import pandas as pd
import numpy as np

match = pd.read_csv("../data/raw/match_data.csv", parse_dates=["match_date"])
train = pd.read_csv("../data/raw/training_load.csv", parse_dates=["date"])
inj = pd.read_csv("../data/raw/injury_history.csv", parse_dates=["injury_date"])

match.head(), train.head(), inj.head()


(      season       team match_id match_date player_id player_name position  \
 0  2025-2026  TR_Club_A      M01 2025-08-11       P01   Player_01       AM   
 1  2025-2026  TR_Club_A      M01 2025-08-11       P02   Player_02        W   
 2  2025-2026  TR_Club_A      M01 2025-08-11       P03   Player_03       CM   
 3  2025-2026  TR_Club_A      M01 2025-08-11       P04   Player_04       ST   
 4  2025-2026  TR_Club_A      M01 2025-08-11       P05   Player_05       AM   
 
    minutes  distance_km  sprints  high_intensity_runs     xg     xa  \
 0       45         4.07        5                   15  0.063  0.109   
 1       75         6.39       21                   15  0.180  0.000   
 2       89         9.17        3                   25  0.000  0.118   
 3       27         1.71        2                    6  0.032  0.036   
 4       65         4.32        1                   10  0.000  0.137   
 
    passes_attempted  pass_accuracy  duels  duels_won  yellow  red  
 0                18 

In [2]:
# Son 14 gün antrenman yükü
recent_load = (
    train.sort_values("date")
    .groupby("player_id")
    .tail(14)
    .groupby("player_id")["training_load"]
    .mean()
    .reset_index(name="recent_load")
)

# Toplam sakatlık günleri
injury_days = (
    inj.groupby("player_id")["days_out"]
    .sum()
    .reset_index(name="total_injury_days")
)

# Sakatlık sayısı
injury_count = (
    inj.groupby("player_id")
    .size()
    .reset_index(name="injury_count")
)


In [3]:
risk = recent_load.merge(injury_days, on="player_id", how="left") \
                  .merge(injury_count, on="player_id", how="left")

risk.fillna(0, inplace=True)

# Normalize (0–1)
risk["load_norm"] = (risk["recent_load"] - risk["recent_load"].min()) / (risk["recent_load"].max() - risk["recent_load"].min() + 1e-6)
risk["injury_days_norm"] = (risk["total_injury_days"] - risk["total_injury_days"].min()) / (risk["total_injury_days"].max() - risk["total_injury_days"].min() + 1e-6)
risk["injury_count_norm"] = (risk["injury_count"] - risk["injury_count"].min()) / (risk["injury_count"].max() - risk["injury_count"].min() + 1e-6)

# Baseline Injury Risk Score
risk["injury_risk_score"] = (
    0.5 * risk["load_norm"] +
    0.3 * risk["injury_days_norm"] +
    0.2 * risk["injury_count_norm"]
)

risk.sort_values("injury_risk_score", ascending=False).head(10)


Unnamed: 0,player_id,recent_load,total_injury_days,injury_count,load_norm,injury_days_norm,injury_count_norm,injury_risk_score
13,P14,469.385714,27.0,2.0,0.710853,0.490909,1.0,0.702699
8,P09,512.2,14.0,1.0,0.968493,0.254545,0.5,0.66061
16,P17,467.3,21.0,1.0,0.698302,0.381818,0.5,0.563696
11,P12,427.385714,22.0,2.0,0.458113,0.4,1.0,0.549056
10,P11,357.185714,55.0,2.0,0.035676,1.0,1.0,0.517838
15,P16,454.535714,19.0,1.0,0.621492,0.345455,0.5,0.514382
2,P03,440.507143,25.0,1.0,0.537073,0.454545,0.5,0.5049
5,P06,517.435714,0.0,0.0,1.0,0.0,0.0,0.5
12,P13,507.078571,0.0,0.0,0.937675,0.0,0.0,0.468837
4,P05,442.928571,11.0,1.0,0.551644,0.2,0.5,0.435822


In [4]:
risk["risk_label"] = pd.cut(
    risk["injury_risk_score"],
    bins=[-1, 0.33, 0.66, 1.1],
    labels=["LOW", "MEDIUM", "HIGH"]
)

risk[["player_id", "injury_risk_score", "risk_label"]].sort_values("injury_risk_score", ascending=False)


Unnamed: 0,player_id,injury_risk_score,risk_label
13,P14,0.702699,HIGH
8,P09,0.66061,HIGH
16,P17,0.563696,MEDIUM
11,P12,0.549056,MEDIUM
10,P11,0.517838,MEDIUM
15,P16,0.514382,MEDIUM
2,P03,0.5049,MEDIUM
5,P06,0.5,MEDIUM
12,P13,0.468837,MEDIUM
4,P05,0.435822,MEDIUM


In [5]:
transfer = pd.read_csv("../data/raw/transfer_candidates.csv")

transfer.head()


Unnamed: 0,player_id,age,market_value_m,contract_months_left,salary_k,interest_level,agent_risk
0,P01,29,0.25,6,30,3,0
1,P02,25,2.16,16,458,4,0
2,P03,21,2.8,3,30,4,0
3,P04,19,2.05,16,210,1,1
4,P05,30,2.57,7,30,5,1


In [6]:
tr = transfer.copy()

# Normalize
tr["contract_norm"] = 1 - (tr["contract_months_left"] / tr["contract_months_left"].max())
tr["interest_norm"] = tr["interest_level"] / tr["interest_level"].max()
tr["age_norm"] = tr["age"] / tr["age"].max()

tr["transfer_risk_score"] = (
    0.4 * tr["contract_norm"] +
    0.3 * tr["interest_norm"] +
    0.2 * tr["agent_risk"] +
    0.1 * tr["age_norm"]
)

tr.sort_values("transfer_risk_score", ascending=False).head()


Unnamed: 0,player_id,age,market_value_m,contract_months_left,salary_k,interest_level,agent_risk,contract_norm,interest_norm,age_norm,transfer_risk_score
14,P15,19,4.03,1,188,5,1,0.974359,1.0,0.558824,0.945626
4,P05,30,2.57,7,30,5,1,0.820513,1.0,0.882353,0.91644
2,P03,21,2.8,3,30,4,0,0.923077,0.8,0.617647,0.670995
6,P07,26,5.82,12,261,5,0,0.692308,1.0,0.764706,0.653394
15,P16,34,6.22,5,134,3,0,0.871795,0.6,1.0,0.628718


In [7]:
decision = risk.merge(
    tr[["player_id", "transfer_risk_score"]],
    on="player_id"
)

decision.head()


Unnamed: 0,player_id,recent_load,total_injury_days,injury_count,load_norm,injury_days_norm,injury_count_norm,injury_risk_score,risk_label,transfer_risk_score
0,P01,475.0,0.0,0.0,0.744638,0.0,0.0,0.372319,MEDIUM,0.603756
1,P02,430.257143,14.0,1.0,0.475392,0.254545,0.5,0.41406,MEDIUM,0.549427
2,P03,440.507143,25.0,1.0,0.537073,0.454545,0.5,0.5049,MEDIUM,0.670995
3,P04,464.521429,0.0,0.0,0.681582,0.0,0.0,0.340791,MEDIUM,0.55178
4,P05,442.928571,11.0,1.0,0.551644,0.2,0.5,0.435822,MEDIUM,0.91644


In [8]:
recent_perf = (
    match.sort_values("match_date")
    .groupby("player_id")
    .tail(5)
    .groupby("player_id")
    .agg({
        "xg": "mean",
        "xa": "mean",
        "pass_accuracy": "mean",
        "distance_km": "mean"
    })
    .reset_index()
)

# Normalize
for c in ["xg", "xa", "pass_accuracy", "distance_km"]:
    recent_perf[c+"_norm"] = (recent_perf[c] - recent_perf[c].min()) / (recent_perf[c].max() - recent_perf[c].min() + 1e-6)

recent_perf["expected_contribution"] = (
    0.4 * recent_perf["xg_norm"] +
    0.3 * recent_perf["xa_norm"] +
    0.2 * recent_perf["pass_accuracy_norm"] +
    0.1 * recent_perf["distance_km_norm"]
)

recent_perf.sort_values("expected_contribution", ascending=False).head()


Unnamed: 0,player_id,xg,xa,pass_accuracy,distance_km,xg_norm,xa_norm,pass_accuracy_norm,distance_km_norm,expected_contribution
7,P08,0.1596,0.0846,0.8062,4.17,0.999994,0.941954,0.862553,0.673202,0.922414
14,P15,0.0934,0.072,0.8024,4.776,0.582067,0.80133,0.855698,0.82554,0.72692
15,P16,0.0964,0.0612,0.8166,4.18,0.601006,0.680796,0.881312,0.675716,0.688475
1,P02,0.0432,0.0826,0.8344,4.606,0.26515,0.919633,0.913418,0.782805,0.642914
0,P01,0.0976,0.0596,0.642,4.156,0.608582,0.662939,0.566377,0.669683,0.622558


In [9]:
final = decision.merge(
    recent_perf[["player_id", "expected_contribution"]],
    on="player_id"
)

final.head()


Unnamed: 0,player_id,recent_load,total_injury_days,injury_count,load_norm,injury_days_norm,injury_count_norm,injury_risk_score,risk_label,transfer_risk_score,expected_contribution
0,P01,475.0,0.0,0.0,0.744638,0.0,0.0,0.372319,MEDIUM,0.603756,0.622558
1,P02,430.257143,14.0,1.0,0.475392,0.254545,0.5,0.41406,MEDIUM,0.549427,0.642914
2,P03,440.507143,25.0,1.0,0.537073,0.454545,0.5,0.5049,MEDIUM,0.670995,0.357406
3,P04,464.521429,0.0,0.0,0.681582,0.0,0.0,0.340791,MEDIUM,0.55178,0.553925
4,P05,442.928571,11.0,1.0,0.551644,0.2,0.5,0.435822,MEDIUM,0.91644,0.389673


In [10]:
def decision_label(row):
    if row["expected_contribution"] > 0.6 and row["injury_risk_score"] < 0.4:
        return "AL"
    if row["injury_risk_score"] > 0.7 or row["transfer_risk_score"] > 0.7:
        return "ALMA"
    return "IZLE"

final["decision"] = final.apply(decision_label, axis=1)

final.sort_values("expected_contribution", ascending=False)


Unnamed: 0,player_id,recent_load,total_injury_days,injury_count,load_norm,injury_days_norm,injury_count_norm,injury_risk_score,risk_label,transfer_risk_score,expected_contribution,decision
7,P08,477.114286,0.0,0.0,0.757361,0.0,0.0,0.37868,MEDIUM,0.405973,0.922414,AL
14,P15,413.064286,24.0,1.0,0.371932,0.436364,0.5,0.416875,MEDIUM,0.945626,0.72692,ALMA
15,P16,454.535714,19.0,1.0,0.621492,0.345455,0.5,0.514382,MEDIUM,0.628718,0.688475,IZLE
1,P02,430.257143,14.0,1.0,0.475392,0.254545,0.5,0.41406,MEDIUM,0.549427,0.642914,IZLE
0,P01,475.0,0.0,0.0,0.744638,0.0,0.0,0.372319,MEDIUM,0.603756,0.622558,AL
8,P09,512.2,14.0,1.0,0.968493,0.254545,0.5,0.66061,HIGH,0.294691,0.62045,IZLE
13,P14,469.385714,27.0,2.0,0.710853,0.490909,1.0,0.702699,HIGH,0.52902,0.580027,ALMA
3,P04,464.521429,0.0,0.0,0.681582,0.0,0.0,0.340791,MEDIUM,0.55178,0.553925,IZLE
12,P13,507.078571,0.0,0.0,0.937675,0.0,0.0,0.468837,MEDIUM,0.316501,0.553778,IZLE
11,P12,427.385714,22.0,2.0,0.458113,0.4,1.0,0.549056,MEDIUM,0.558386,0.544595,IZLE
