In [55]:
# Gerekli kütüphaneler
import pandas as pd
import numpy as np

In [56]:
CSV_PATH = "match_odds_wide.csv"
KEY_COLS = [
    "match_date", "match_time", "tournament", "match_id",
    "homeTeam", "awayTeam",
    "firstHalfHomeGoal", "firstHalfAwayGoal",
    "totalHomeGoal", "totalAwayGoal",
    "homeCorner", "awayCorner"
]

In [57]:
df = pd.read_csv(CSV_PATH)

In [58]:
numeric_cols = [col for col in df.columns if col not in KEY_COLS]

In [59]:
train_set = df.dropna(subset=["totalHomeGoal", "totalAwayGoal"])
train_set.shape

(776, 736)

In [60]:
test_set = df[df[["totalHomeGoal", "totalAwayGoal"]].isna().any(axis=1)]
test_set.shape

(106, 736)

In [61]:
# 3. Özellik sütunlarını belirle
feature_cols = [col for col in df.columns if col not in KEY_COLS]

# Sayısal verileri al (kategorik varsa elleme hatası olur)
feature_cols = [col for col in feature_cols if pd.api.types.is_numeric_dtype(df[col])]


In [62]:
# Train: skoru olanlar, Test: skoru olmayanlar
train_df = df[df["totalHomeGoal"].notna()].copy()
test_df  = df[df["totalHomeGoal"].isna()].copy()

# Sadece oran sütunları
ODDS_COLS = [col for col in df.columns if col not in KEY_COLS]

train_odds = train_df[ODDS_COLS]
test_odds = test_df[ODDS_COLS]

In [63]:
# -- 2. Testten rastgele bir maç seç
import random

random_idx = random.randint(0, len(test_df)-1)
test_row = test_df.iloc[random_idx]

In [64]:
test_row["Maç Sonucu :: MS 2"]

1.69

In [None]:
import pandas as pd
import numpy as np
import random

CSV_PATH = "match_odds_wide.csv"

# -- 1. Veri okuma ve ayrıştırma
df = pd.read_csv(CSV_PATH)

meta_cols = [
    'match_date', 'match_time', 'tournament', 'match_id',
    'homeTeam', 'awayTeam',
    'firstHalfHomeGoal', 'firstHalfAwayGoal',
    'totalHomeGoal', 'totalAwayGoal',
    'homeCorner', 'awayCorner'
]

numeric_cols = [col for col in df.columns if col not in meta_cols]

train_df = df[df["totalHomeGoal"].notna()].reset_index(drop=True)
test_df = df[df["totalHomeGoal"].isna()].reset_index(drop=True)

# -- 2. Testten rastgele bir maç seç
random_idx = random.randint(0, len(test_df)-1)
test_row = test_df.iloc[random_idx]

# -- 3. En yakın 100 maçı bul
distances = []

for idx, train_row in train_df.iterrows():
    train_vals = train_row[numeric_cols]
    test_vals = test_row[numeric_cols]

    valid_mask = ~(train_vals.isna() | test_vals.isna())

    if valid_mask.sum() == 0:
        dist = np.inf
    else:
        diff_squared = (train_vals[valid_mask] - test_vals[valid_mask]) ** 2
        dist = np.sqrt(diff_squared.sum())

    distances.append((idx, dist))

distance_df = pd.DataFrame(distances, columns=["index", "distance"])
top_100_indices = distance_df.sort_values(by="distance").head(10chrome
                                                              0)["index"]

top_100_matches = train_df.loc[top_100_indices]

# -- 4. MS 1-X-2 sonuçlarını hesapla
ms1 = ((top_100_matches["totalHomeGoal"] > top_100_matches["totalAwayGoal"])).sum()
msx = ((top_100_matches["totalHomeGoal"] == top_100_matches["totalAwayGoal"])).sum()
ms2 = ((top_100_matches["totalHomeGoal"] < top_100_matches["totalAwayGoal"])).sum()

total = ms1 + msx + ms2

ms1_prob = ms1 / total
msx_prob = msx / total
ms2_prob = ms2 / total

# -- 5. Sonuçları yazdır
print(f"Test Maçı: {test_row['homeTeam']} vs {test_row['awayTeam']}")
print(f"\nTop 100 en yakın maç bazında MS olasılıkları:")
print(f"  MS 1: %{ms1_prob * 100:.2f}")
print(f"  MS X: %{msx_prob * 100:.2f}")
print(f"  MS 2: %{ms2_prob * 100:.2f}")


Test Maçı: Sportivo Ameliano vs Cerro Porteno

Top 100 en yakın maç bazında MS olasılıkları:
  MS 1: %31.00
  MS X: %23.00
  MS 2: %46.00
