In [1]:
# STEP 1: IMPORT LIBRARY
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# 📌 CATATAN:
# Kita pakai pandas buat handle CSV dan eksplorasi data
# MinMaxScaler buat normalisasi nanti

# STEP 2: BACA KETIGA CSV
df1 = pd.read_csv('/content/Badminton_Match_Result_Dataset_1.csv')
df2 = pd.read_csv('/content/Badminton_Match_Result_Dataset_2.csv')
df3 = pd.read_csv('/content/Badminton_Match_Result_Dataset_3.csv')

# 📌 CATATAN:
# Baca ketiga dataset dan simpan masing-masing ke variabel

# STEP 3: CEK ISI DATASET
print("Dataset 1:")
print(df1.head())
print("\nDataset 2:")
print(df2.head())
print("\nDataset 3:")
print(df3.head())

# 📌 CATATAN:
# Kita print 5 baris pertama buat lihat isi dan struktur kolom

# STEP 4: GABUNGKAN DATASET
df = pd.concat([df1, df2, df3], ignore_index=True)

# 📌 CATATAN:
# Gabungin semua dataset jadi satu dataframe

# STEP 5: CEK MISSING VALUE
print("\nMissing Values:")
print(df.isnull().sum())

# 📌 CATATAN:
# Cek apakah ada data yang kosong (missing/null)

# STEP 6: DATA CLEANING (contoh: drop NA atau isi nilai kosong)
# Misalnya kita drop baris yang ada nilai kosong
df_cleaned = df.dropna()

# 📌 CATATAN:
# Atau bisa juga pake df.fillna() kalau mau isi nilai kosong

# STEP 7: NORMALISASI (fitur numerik aja)
# Misalnya kita normalisasi semua kolom numerik
numerical_cols = df_cleaned.select_dtypes(include=['int64', 'float64']).columns
scaler = MinMaxScaler()
df_cleaned[numerical_cols] = scaler.fit_transform(df_cleaned[numerical_cols])

# 📌 CATATAN:
# MinMaxScaler ngeskalain data numerik ke range 0-1
# Hati-hati: Ini harusnya dilakukan setelah cleaning

# STEP 8: CEK DATASET HASILNYA
print("\nData setelah gabung, dibersihkan, dan dinormalisasi:")
print(df_cleaned.head())


Dataset 1:
  team_1;team_2;t1_p1_condition;t1_p2_condition;t2_p1_condition;t2_p2_condition;score
0  David/Moel;Wawo/Angga;Recovery;Injured;Injured...                                 
1  Arya/David;Dennis/Eka;Injured;Fit;Fit;Injured;...                                 
2  Eka/David;Wawo/Arya;Fit;Recovery;Recovery;Fit;...                                 
3  David/Eka;Wirawan/Arya;Sick;Fit;Injured;Recove...                                 
4  Wawo/Arya;Eka/Dennis;Injured;Sick;Fit;Injured;...                                 

Dataset 2:
  tim_1;tim_2;kondisi_team1_player1;kondisi_team1_player2;kondisi_team2_player1;kondisi_team2_player2;skor
0  David/Moel;Wawo/Angga;Recovery;Injured;Injured...                                                      
1  Arya/David;Dennis/Eka;Injured;Fit;Fit;Injured;...                                                      
2  Eka/David;Wawo/Arya;Fit;Recovery;Recovery;Fit;...                                                      
3  David/Eka;Wirawan/Arya;Sick;Fi

ValueError: at least one array or dtype is required

💥 Masalah yang Muncul:
1. CSV pakai delimiter ; (titik koma)
Makanya waktu read_csv(), datanya dibaca sebagai 1 kolom besar, bukan kolom-kolom terpisah. Itu kenapa waktu df.head() keluar, kolomnya masih kayak gini:

Copy
Edit
team_1;team_2;t1_p1_condition;t1_p2_condition;t2_p1_condition;t2_p2_condition;score
Semua masih nyatu.

2. Jumlah kolom beda dan nama beda di tiap dataset
Dataset 2 & 3 pake bahasa Indonesia dan beda naming.
Dataset 3 bahkan pake istilah kayak "Cedera", "Pemulihan" dll — jadi ada perlu translasi kondisi juga nanti.

In [5]:
# Baca ulang dengan delimiter yang benar
df1 = pd.read_csv('/content/Badminton_Match_Result_Dataset_1.csv', delimiter=';')
df2 = pd.read_csv('/content/Badminton_Match_Result_Dataset_2.csv', delimiter=';')
df3 = pd.read_csv('/content/Badminton_Match_Result_Dataset_3.csv', delimiter=';')

In [6]:
# Step 2: Rename kolom biar konsisten (bisa pakai bahasa Inggris semua)
# Rename dataset 2
df2.columns = ['team_1', 'team_2', 't1_p1_condition', 't1_p2_condition', 't2_p1_condition', 't2_p2_condition', 'score']

# Rename dataset 3
df3.columns = ['team_1', 'team_2', 't1_p1_condition', 't1_p2_condition', 't2_p1_condition', 't2_p2_condition', 'score']

In [7]:
# Step 3: Translate nilai kondisi dari Bahasa Indonesia ke Inggris (khusus dataset 3)
# Buat kamus translasi
translation_map = {
    'Sehat': 'Fit',
    'Cedera': 'Injured',
    'Pemulihan': 'Recovery',
    'Sakit': 'Sick'
}

# Replace di kolom kondisi
condition_cols = ['t1_p1_condition', 't1_p2_condition', 't2_p1_condition', 't2_p2_condition']
for col in condition_cols:
    df3[col] = df3[col].replace(translation_map)


In [8]:
# 🔧 Step 4: Gabung semua dataset

df = pd.concat([df1, df2, df3], ignore_index=True)

In [9]:
print(df.isnull().sum())  # Cek missing values
df_cleaned = df.dropna()  # Drop baris kosong

team_1             4
team_2             1
t1_p1_condition    3
t1_p2_condition    2
t2_p1_condition    1
t2_p2_condition    1
score              3
dtype: int64


In [10]:
# Cek 5 baris pertama
print(df_cleaned.head())

# Cek jumlah baris & kolom
print("\nShape of dataset:", df_cleaned.shape)

# Cek tipe data tiap kolom
print("\nData types:")
print(df_cleaned.dtypes)

# Cek jumlah data unik per kolom (buat tau kolom kategori)
print("\nUnique values per column:")
for col in df_cleaned.columns:
    print(f"{col}: {df_cleaned[col].nunique()} unique values")


       team_1        team_2 t1_p1_condition t1_p2_condition t2_p1_condition  \
0  David/Moel    Wawo/Angga        Recovery         Injured         Injured   
1  Arya/David    Dennis/Eka         Injured             Fit             Fit   
2   Eka/David     Wawo/Arya             Fit        Recovery        Recovery   
3   David/Eka  Wirawan/Arya            Sick             Fit         Injured   
4   Wawo/Arya    Eka/Dennis         Injured            Sick             Fit   

  t2_p2_condition score  
0            Sick  '2-0  
1         Injured  '2-1  
2             Fit  '1-2  
3        Recovery  '2-1  
4         Injured  '1-2  

Shape of dataset: (5985, 7)

Data types:
team_1             object
team_2             object
t1_p1_condition    object
t1_p2_condition    object
t2_p1_condition    object
t2_p2_condition    object
score              object
dtype: object

Unique values per column:
team_1: 56 unique values
team_2: 56 unique values
t1_p1_condition: 4 unique values
t1_p2_condition: 4 un

🧠 ANALISIS DATASET LO SEKARANG:
✅ Kondisi Data:
5985 baris, lumayan banyak! Cukup untuk bikin model ML belajar.

Semua kolom condition udah bersih dan punya 4 kategori (Fit, Sick, Injured, Recovery).

Kolom score kayaknya nunjukin hasil pertandingan, contoh: '2-0, '1-2, dll. Tapi masih ada ' di depannya → kita bersihin nanti.

Kolom team_1 dan team_2 itu kombinasi pemain, mungkin bisa diproses lebih jauh kalau mau analisis performa pemain tertentu (optional sih).

🎯 NEXT: Feature Engineering
🎯 Tujuan:
Nyiapin fitur-fitur (X) yang bisa dipakai buat memprediksi hasil pertandingan (y). Kita bisa mulai dari:

1. 🧼 Bersihin kolom score
Kita buang tanda ' dan ekstrak siapa yang menang:


In [11]:
# Bersihin tanda '
df_cleaned['score'] = df_cleaned['score'].str.replace("'", "")

# Tambahin kolom 'winner' berdasarkan skor
def determine_winner(score):
    try:
        team1, team2 = map(int, score.split('-'))
        if team1 > team2:
            return 'team_1'
        elif team2 > team1:
            return 'team_2'
        else:
            return 'draw'
    except:
        return 'unknown'

df_cleaned['winner'] = df_cleaned['score'].apply(determine_winner)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned['score'] = df_cleaned['score'].str.replace("'", "")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned['winner'] = df_cleaned['score'].apply(determine_winner)


In [12]:
# 2. 🔢 Encode kategori condition jadi angka
# Mapping kondisi ke angka
condition_map = {
    'Fit': 3,
    'Recovery': 2,
    'Injured': 1,
    'Sick': 0
}

for col in ['t1_p1_condition', 't1_p2_condition', 't2_p1_condition', 't2_p2_condition']:
    df_cleaned[col] = df_cleaned[col].map(condition_map)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned[col] = df_cleaned[col].map(condition_map)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned[col] = df_cleaned[col].map(condition_map)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned[col] = df_cleaned[col].map(condition_map)
A value is trying to be set on a copy of a sli

3. ➕ Bikin fitur baru (optional tapi powerful)
Misalnya:

Total kondisi pemain per tim

Apakah ada pemain yang sakit? (binary)

In [13]:
# Jumlah skor kondisi per tim (semakin tinggi = makin fit)
df_cleaned['team_1_total_condition'] = df_cleaned['t1_p1_condition'] + df_cleaned['t1_p2_condition']
df_cleaned['team_2_total_condition'] = df_cleaned['t2_p1_condition'] + df_cleaned['t2_p2_condition']

# Apakah ada pemain yang sakit di tiap tim?
df_cleaned['team_1_has_sick'] = ((df_cleaned['t1_p1_condition'] == 0) | (df_cleaned['t1_p2_condition'] == 0)).astype(int)
df_cleaned['team_2_has_sick'] = ((df_cleaned['t2_p1_condition'] == 0) | (df_cleaned['t2_p2_condition'] == 0)).astype(int)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned['team_1_total_condition'] = df_cleaned['t1_p1_condition'] + df_cleaned['t1_p2_condition']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned['team_2_total_condition'] = df_cleaned['t2_p1_condition'] + df_cleaned['t2_p2_condition']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_

In [14]:
# Fitur dan target
features = ['t1_p1_condition', 't1_p2_condition', 't2_p1_condition', 't2_p2_condition',
            'team_1_total_condition', 'team_2_total_condition',
            'team_1_has_sick', 'team_2_has_sick']

X = df_cleaned[features]
y = df_cleaned['winner']


In [15]:
# 📌 CATATAN: Ini asumsi lo udah jalanin step 1 dan 2 ya

# --- HEAD (5 data pertama)
print("🔹 Head (5 baris pertama):")
print(df_cleaned.head())

# --- TAIL (5 data terakhir)
print("\n🔹 Tail (5 baris terakhir):")
print(df_cleaned.tail())

# --- SHAPE (jumlah baris dan kolom)
print("\n🔹 Shape of dataset:")
print(df_cleaned.shape)

# --- NAMA KOLOM
print("\n🔹 Kolom-kolom:")
print(df_cleaned.columns.tolist())


🔹 Head (5 baris pertama):
       team_1        team_2  t1_p1_condition  t1_p2_condition  \
0  David/Moel    Wawo/Angga                2                1   
1  Arya/David    Dennis/Eka                1                3   
2   Eka/David     Wawo/Arya                3                2   
3   David/Eka  Wirawan/Arya                0                3   
4   Wawo/Arya    Eka/Dennis                1                0   

   t2_p1_condition  t2_p2_condition score  winner  team_1_total_condition  \
0                1                0   2-0  team_1                       3   
1                3                1   2-1  team_1                       4   
2                2                3   1-2  team_2                       5   
3                1                2   2-1  team_1                       3   
4                3                1   1-2  team_2                       1   

   team_2_total_condition  team_1_has_sick  team_2_has_sick  
0                       1                0                

In [16]:
df_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5985 entries, 0 to 5999
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   team_1                  5985 non-null   object
 1   team_2                  5985 non-null   object
 2   t1_p1_condition         5985 non-null   int64 
 3   t1_p2_condition         5985 non-null   int64 
 4   t2_p1_condition         5985 non-null   int64 
 5   t2_p2_condition         5985 non-null   int64 
 6   score                   5985 non-null   object
 7   winner                  5985 non-null   object
 8   team_1_total_condition  5985 non-null   int64 
 9   team_2_total_condition  5985 non-null   int64 
 10  team_1_has_sick         5985 non-null   int64 
 11  team_2_has_sick         5985 non-null   int64 
dtypes: int64(8), object(4)
memory usage: 607.9+ KB


Mantap, ini request analisis head-to-head antara dua pemain, Arya vs Dennis, dengan aturan penting:

❗ Kalau Arya dan Dennis ada di tim yang sama, match itu gak dihitung.

✅ Step-by-step logic:
Cek baris data yang mengandung Arya dan Dennis.

Pastikan mereka ada di tim yang berbeda (bukan satu tim).

Lihat tim mana yang menang, apakah timnya Arya atau Dennis.

Hitung total kemenangan Arya dan Dennis.

In [17]:
# Ambil match yang melibatkan Arya DAN Dennis
h2h_matches = df_cleaned[
    df_cleaned['team_1'].str.contains('Arya') & df_cleaned['team_2'].str.contains('Dennis') |
    df_cleaned['team_2'].str.contains('Arya') & df_cleaned['team_1'].str.contains('Dennis')
]

# Filter match yang TIDAK satu tim
# Jadi exclude kalau dalam 1 team ada Arya & Dennis
h2h_filtered = h2h_matches[~(
    h2h_matches['team_1'].str.contains('Arya') & h2h_matches['team_1'].str.contains('Dennis') |
    h2h_matches['team_2'].str.contains('Arya') & h2h_matches['team_2'].str.contains('Dennis')
)]

# Hitung menang siapa
arya_win = ((h2h_filtered['winner'] == 'team_1') & (h2h_filtered['team_1'].str.contains('Arya')) |
            (h2h_filtered['winner'] == 'team_2') & (h2h_filtered['team_2'].str.contains('Arya'))).sum()

dennis_win = ((h2h_filtered['winner'] == 'team_1') & (h2h_filtered['team_1'].str.contains('Dennis')) |
              (h2h_filtered['winner'] == 'team_2') & (h2h_filtered['team_2'].str.contains('Dennis'))).sum()

# Tampilkan hasil
print(f"🔹 Total Match Arya vs Dennis (bukan rekanan): {len(h2h_filtered)}")
print(f"✅ Arya menang: {arya_win}")
print(f"✅ Dennis menang: {dennis_win}")


🔹 Total Match Arya vs Dennis (bukan rekanan): 886
✅ Arya menang: 447
✅ Dennis menang: 439


Oke, bro. Kita lanjut ke pertanyaan kelima:

Hitung berapa kali David menang dengan skor 2-0

✅ Logika langkahnya:
Cari baris di mana David ada di team_1 atau team_2.

Cek tim mana yang menang (winner == team_1 atau team_2).

Pastikan skornya '2-0'.

Hitung jumlah baris yang memenuhi semua kondisi itu.

In [18]:
# Cari match yang dimenangkan oleh David dengan skor 2-0
david_win_2_0 = df_cleaned[
    ((df_cleaned['winner'] == 'team_1') & df_cleaned['team_1'].str.contains('David')) |
    ((df_cleaned['winner'] == 'team_2') & df_cleaned['team_2'].str.contains('David'))
]

# Filter lagi yang skornya 2-0
david_win_2_0 = david_win_2_0[david_win_2_0['score'] == '2-0']

# Tampilkan hasil
print(f"🏸 David menang dengan skor 2-0 sebanyak: {len(david_win_2_0)} kali")


🏸 David menang dengan skor 2-0 sebanyak: 375 kali


📝 Catatan:
Ini ngasumsikan kolom winner dan score udah bener dari feature engineering sebelumnya.

'score' harus '2-0' dalam bentuk string — pastiin gak ada spasi atau kutip aneh (kalau sebelumnya ada '2-0, pastikan udah dibersihin).

Kalau sebelumnya belum dibersihin, bisa tambahin ini di preprocessing step:

df_cleaned['score'] = df_cleaned['score'].str.replace("'", "").str.strip()


Oke lanjut, bro! Kali ini pertanyaannya:

Hitung berapa kali Moel kalah dengan skor 1-2

✅ Step-by-step logic:
Cari match di mana Moel ada di team_1 atau team_2.

Pastikan Moel ada di tim yang kalah.

Skornya harus '1-2'.

Hitung totalnya.

In [19]:
# Cari semua match yang skor akhirnya 1-2
moel_match = df_cleaned[df_cleaned['score'] == '1-2']

# Cari match di mana Moel kalah (Moel ada di tim yang bukan 'winner')
moel_kalah_1_2 = moel_match[
    ((moel_match['team_1'].str.contains('Moel')) & (moel_match['winner'] == 'team_2')) |
    ((moel_match['team_2'].str.contains('Moel')) & (moel_match['winner'] == 'team_1'))
]

# Tampilkan hasil
print(f"😓 Moel kalah dengan skor 1-2 sebanyak: {len(moel_kalah_1_2)} kali")


😓 Moel kalah dengan skor 1-2 sebanyak: 365 kali


📝 Notes:
Ini udah otomatis cek siapa yang kalah berdasarkan kolom winner.

Skor '1-2' harus sudah dibersihin juga ya (kayak hilangin ' dan spasi).

Kalau score sebelumnya masih ada ' (contoh: '1-2), pastiin kamu udah pakai:

df_cleaned['score'] = df_cleaned['score'].str.replace("'", "").str.strip()




---



**1. Split data untuk training dan testing, persentasenya silahkan ditentukan sendiri **

🎯 Tujuan Split:
Split data itu tujuannya buat melatih model (training) dan mengevaluasi performa model (testing) secara adil dan objektif. Jadi model lo gak cuma jago di data latih doang (overfitting), tapi bisa generalisasi ke data baru.

✅ Yang Perlu Dipersiapkan:
# 1. Feature & Target (X & y)
Lo harus tahu:

Fitur-fitur apa yang mau dipake buat prediksi? (contoh: kondisi pemain)

Target-nya apa? (contoh: winner, atau score, tergantung apa yang mau diprediksi). contoh



```
X = df_cleaned[['t1_p1_condition', 't1_p2_condition', 't2_p1_condition', 't2_p2_condition']]
y = df_cleaned['winner']
```



Tapi inget:

Kalau kolom masih kategorikal (kayak Fit, Sick, dll), harus di-encode dulu.

Kalau mau prediksi score atau winner, pastiin targetnya masuk akal untuk classification atau regression.


# 2. Encoding Kategorikal
Model machine learning gak bisa baca teks. Jadi lo harus ubah semua kategori ke angka:



```
from sklearn.preprocessing import LabelEncoder

# Label encode semua fitur
label_enc = LabelEncoder()
X_encoded = X.apply(label_enc.fit_transform)

# Encode target juga
y_encoded = label_enc.fit_transform(y)
```



# 3. Split Data


```
Pakai train_test_split dari scikit-learn:

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y_encoded, test_size=0.2, random_state=42
)
```



test_size=0.2 artinya 20% buat test, 80% buat train (umum banget).

random_state biar hasilnya konsisten.

# 4. (Optional) Imbalance Check
Kalau target-nya (misal winner) sangat tidak seimbang (misal team_1 menang 90%), lo bisa pertimbangin:

pakai stratify=y saat split

atau pakai teknik balancing (misal SMOTE, undersampling, dll).


```
X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

```



🎯 Target yang Bisa Diprediksi dari Dataset:
# 1. Winner (siapa yang menang?) 🏆
Kita bisa bikin kolom baru winner dari kolom score, misalnya:

Kalau score == '2-0' atau '2-1' → berarti team_1 menang.

Kalau score == '0-2' atau '1-2' → berarti team_2 menang.

📌 Ini cocok banget buat Classification Task:

Kelas: team_1_win vs team_2_win

# 2. Score (nilai akhir: 2-0, 2-1, dst) 🔢
Kita bisa prediksi langsung nilai score, tapi ini tricky:

Bentuk skornya bukan angka asli (bukan 1, 2, 3...) tapi string kayak '2-1'.

Bisa diubah ke kategori, jadi ini juga bisa jadi Classification Task:

Kelas: '2-0', '2-1', '1-2', '0-2', dll.

# 3. Point Gap (selisih skor) ➖
Misal dari 2-1 → gap = +1 buat team_1

Ini bisa dibuat jadi Regression Task (output berupa angka)

# 4. Apakah menang dengan skor telak? (Misal 2-0) 💥
Ini bisa jadi Binary Classification:

True kalau menang 2-0, False kalau 2-1, atau kalah.

# 5. Prediksi peluang menang berdasarkan kondisi pemain 🤕💪
Kita bisa fokus di kondisi pemain sebagai fitur, lalu lihat siapa yang menang.

Ini mirip dengan prediksi winner, tapi bisa digabung dengan feature engineering lebih lanjut kayak rating pemain (kalau ada), atau performa sebelumnya.

🚨 Yang Gak Bisa Diprediksi (dari data yang sekarang):
Siapa pemain individu yang paling berpengaruh (butuh data tambahan)

Skor per game/set (gak tersedia)

Lokasi pertandingan, wasit, cuaca, dll (gak tersedia di dataset)

In [21]:
X = df.drop(columns=['score'])
y = df['score']


In [22]:
df_final = df


In [23]:
# 🧪 Step 1: Split Data untuk Training dan Testing
from sklearn.model_selection import train_test_split

# Asumsikan target yang mau kita prediksi adalah skor
X = df_final.drop(columns=['score'])  # fitur
y = df_final['score']  # label

# Encoding (wajib kalau ada kategori)
X_encoded = pd.get_dummies(X)

# Split 80% train, 20% test
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# 💡 Catatan: Di sini kita pakai score sebagai label, tapi nanti lo bisa ganti ke winner kalo mau prediksi menang/kalah.