In [16]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import cross_val_score

from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import joblib

In [17]:
df = pd.read_csv(r"C:\ML_projects\AttackScoreAI\Data\futbolcular_final.csv")

In [18]:
numeric_cols = [
    "Mac","Dakika","Gol","Asist","xG","Gol/90","Asist/90",
    "Sut/90","Isabetli_Sut/90","Gol/Sut_Orani","Skor_Katkisi"
]
for c in numeric_cols:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

In [19]:
df["attack_score_raw"] = (
    5.0 * df.get("Gol/90",np.nan)+
    3.0 * df.get("Asist/90",np.nan)+
    0.03 * df.get("xG",np.nan)+
    0.8 * df.get("Sut/90",np.nan)+
    1.2 * df.get("Isabetli_Sut/90",np.nan)+
    2.0 * df.get("Gol/Sut_Orani",np.nan)
)

valid_raw = df["attack_score_raw"].dropna()
if len(valid_raw) < 10:
    raise ValueError("attack_score_raw için yeterli satır yok")

low = valid_raw.quantile(0.05)
high = valid_raw.quantile(0.95)

df["attack_score"] = (
    (df["attack_score_raw"] - low) / (high - low)
).clip(0, 1) * 100


print("attack_score üretildi")
cols_preview = [c for c in ["Oyuncu","Gol/90","Asist/90", "xG", "attack_score"] if c in df.columns]
print(df[cols_preview].head())

attack_score üretildi
             Oyuncu  Gol/90  Asist/90    xG  attack_score
0  Brenden Aaronson    0.17      0.16  27.1     29.358591
1     Simon Adingra    0.35      0.17  12.3     48.400800
2        Amine Adli    0.28      0.22  16.3     45.190052
3    Carlos Alcaraz    0.21      0.15  16.7     39.954062
4    Jaidon Anthony    0.22      0.15  23.7     34.619279


In [20]:
feature_candidates = [
    "Mac","Dakika","xG",
    "Sut/90","Isabetli_Sut/90"
]
features = [c for c in feature_candidates if c in df.columns]
print("Kullanılan feature'lar",features)
data = df[features + ["attack_score"]].dropna(subset=["attack_score"]).copy()
X = data[features]
y = data["attack_score"]

Kullanılan feature'lar ['Mac', 'Dakika', 'xG', 'Sut/90', 'Isabetli_Sut/90']


In [21]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [22]:
preprocess= ColumnTransformer(
    transformers=[
        ("num", Pipeline(steps=[
            ("imputer",SimpleImputer(strategy="median")),
            ("scaler",StandardScaler())
        ]),features)
    ],
    remainder='drop'
)

In [23]:
models = {
    "Ridge": Ridge(alpha=10, random_state=42),
    "Lasso": Lasso(alpha=0.1, random_state=42),
    "ElasticNet": ElasticNet(alpha=0.1, l1_ratio=0.7, random_state=42),
    "RandomForest": RandomForestRegressor(n_estimators=300, max_depth=12, random_state=42, n_jobs=-1),
    "GradientBoosting": GradientBoostingRegressor(n_estimators=300, learning_rate=0.05, max_depth=3, random_state=42)
}


In [24]:
print("\n Cross-validation sonuçları:")

for name,model in models.items():
    pipe_cv = Pipeline(steps=[
        ("prep",preprocess),
         ("model", model)
    ]
    )

    cv_scores=cross_val_score(
        pipe_cv,
        X,y,
        cv=5,
        scoring="r2"
    )

    print(f"{name}: CV R2= {cv_scores.mean():.3f}±{cv_scores.std():.3f}")


 Cross-validation sonuçları:
Ridge: CV R2= 0.785±0.141
Lasso: CV R2= 0.764±0.161
ElasticNet: CV R2= 0.776±0.154
RandomForest: CV R2= 0.918±0.036
GradientBoosting: CV R2= 0.909±0.033


In [25]:
if "Gol/90" in X_train.columns:
    sample_weight_train = 1.0 + 0.25 * X_train ["Gol/90"].fillna(0).clip(lower=0)
else:
    sample_weight_train = None

In [26]:
results = []
pipelines = {}

for name, model in models.items():
    pipe = Pipeline(steps=[
        ("prep",preprocess),
        ("model", model)
    ])

    if sample_weight_train is not None: 
        pipe.fit(X_train, y_train, model__sample_weight=sample_weight_train)
    else:
        pipe.fit(X_train, y_train)

    preds = pipe.predict(X_test)

    rmse= np.sqrt(mean_squared_error(y_test, preds))

    results.append({
        "Model": name,
        "MAE": mean_absolute_error(y_test, preds),
        "RMSE": rmse,
        "R2": r2_score(y_test, preds)
    })
    pipelines[name]=pipe

results_df = pd.DataFrame(results).sort_values(["RMSE","MAE"],ascending=True)

print("\n Model Karşılaştırma:")
display(results_df)


 Model Karşılaştırma:


Unnamed: 0,Model,MAE,RMSE,R2
3,RandomForest,3.375095,4.039091,0.973798
4,GradientBoosting,3.597246,4.268191,0.970742
0,Ridge,4.940537,6.747005,0.926889
2,ElasticNet,4.553968,7.479108,0.910162
1,Lasso,4.489708,8.300601,0.889343


In [27]:
from sklearn.model_selection import GridSearchCV

final_lasso_pipe = Pipeline(
    steps=[
        ("prep",preprocess),
        ("model",Lasso(max_iter=20000, random_state=42))
    ]
)

param_grid = {
    "model__alpha": np.logspace(-4,1,25)
}

grid = GridSearchCV(
    estimator=final_lasso_pipe,
    param_grid=param_grid,
    scoring="r2",
    cv=5,
    n_jobs=-1
)

grid.fit(X_train, y_train)
best_lasso = grid.best_estimator_

print("En iyi alpha:",grid.best_params_["model__alpha"])
print("CV en iyi R2",grid.best_score_)

En iyi alpha: 2.371373705661655
CV en iyi R2 0.7545473179351038


In [28]:
preds = best_lasso.predict(X_test)
mae= mean_absolute_error(y_test, preds)
rmse = np.sqrt(mean_squared_error(y_test,preds))
r2=r2_score(y_test,preds)

print("\n Sonuçlar")
print(f"MAE : {mae:.3f}")
print(f"RMSE : {rmse:.3f}")
print(f"R2 : {r2:.3f}")


 Sonuçlar
MAE : 5.623
RMSE : 7.015
R2 : 0.921


In [29]:
feature_names=features
coefs=best_lasso.named_steps["model"].coef_
coef_df=pd.DataFrame(
    {
        "Feature": feature_names,
        "Coefficient":coefs,
        "Abs_Coefficient": np.abs(coefs)
    }
).sort_values("Abs_Coefficient",ascending=False)
print("Feature importance")
display(coef_df)
print("Sıfırlanan feature sayısı:",(coef_df["Coefficient"]==0).sum(),
     len(coef_df))

Feature importance


Unnamed: 0,Feature,Coefficient,Abs_Coefficient
4,Isabetli_Sut/90,13.845157,13.845157
2,xG,10.512138,10.512138
3,Sut/90,3.699781,3.699781
0,Mac,0.0,0.0
1,Dakika,-0.0,0.0


Sıfırlanan feature sayısı: 2 5


In [30]:
joblib.dump(best_lasso,r"C:\ML_projects\AttackScoreAI\attack-score-api\final_lasso_attack_score_model.pkl")


['C:\\ML_projects\\AttackScoreAI\\attack-score-api\\final_lasso_attack_score_model.pkl']