In [37]:
import zipfile
import pandas as pd
import os

In [55]:

zip_path = 'C:/Users/Thelissa/Downloads/Badminton_Match_Result_Dataset.zip'
extract_path = 'C:/Users/Thelissa/Documents/Badminton_Match_Result_Dataset'


with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# List the extracted files
extracted_files = os.listdir(extract_path)
print("Extracted files:", extracted_files)



Extracted files: ['Badminton_Match_Result_Dataset_1.csv', 'Badminton_Match_Result_Dataset_2.csv', 'Badminton_Match_Result_Dataset_3.csv']


In [56]:
# Read the extracted CSV files into DataFrames
file1 = pd.read_csv(os.path.join(extract_path, 'Badminton_Match_Result_Dataset_1.csv'), delimiter=';')
file2 = pd.read_csv(os.path.join(extract_path, 'Badminton_Match_Result_Dataset_2.csv'), delimiter=';')
file3 = pd.read_csv(os.path.join(extract_path, 'Badminton_Match_Result_Dataset_3.csv'), delimiter=';')

# Optionally, you can print the first few rows to check
print(file1.head())
print(file2.head())
print(file3.head())

       team_1        team_2 t1_p1_condition t1_p2_condition t2_p1_condition  \
0  David/Moel    Wawo/Angga        Recovery         Injured         Injured   
1  Arya/David    Dennis/Eka         Injured             Fit             Fit   
2   Eka/David     Wawo/Arya             Fit        Recovery        Recovery   
3   David/Eka  Wirawan/Arya            Sick             Fit         Injured   
4   Wawo/Arya    Eka/Dennis         Injured            Sick             Fit   

  t2_p2_condition score  
0            Sick  '2-0  
1         Injured  '2-1  
2             Fit  '1-2  
3        Recovery  '2-1  
4         Injured  '1-2  
        tim_1         tim_2 kondisi_team1_player1 kondisi_team1_player2  \
0  David/Moel    Wawo/Angga              Recovery               Injured   
1  Arya/David    Dennis/Eka               Injured                   Fit   
2   Eka/David     Wawo/Arya                   Fit              Recovery   
3   David/Eka  Wirawan/Arya                  Sick                   F

In [57]:
# Rename columns of the second and third datasets to match the first dataset's column names
file2.rename(columns={
    'tim_1': 'team_1',
    'tim_2': 'team_2',
    'kondisi_team1_player1': 't1_p1_condition',
    'kondisi_team1_player2': 't1_p2_condition',
    'kondisi_team2_player1': 't2_p1_condition',
    'kondisi_team2_player2': 't2_p2_condition',
    'skor': 'score'
}, inplace=True)

file3.rename(columns={
    'tim_1': 'team_1',
    'tim_2': 'team_2',
    'kondisi_team1_player1': 't1_p1_condition',
    'kondisi_team1_player2': 't1_p2_condition',
    'kondisi_team2_player1': 't2_p1_condition',
    'kondisi_team2_player2': 't2_p2_condition',
    'score': 'score'
}, inplace=True)

# Check column names after renaming
# print(file1.columns)
# print(file2.columns)
# print(file3.columns)

# Concatenate the DataFrames
combined_df = pd.concat([file1, file2, file3], ignore_index=True)

# Remove the leading single quote (') from the 'score' column
combined_df['score'] = combined_df['score'].str.replace("'", "", regex=False)

# Check the cleaned data
print(combined_df.head())

       team_1        team_2 t1_p1_condition t1_p2_condition t2_p1_condition  \
0  David/Moel    Wawo/Angga        Recovery         Injured         Injured   
1  Arya/David    Dennis/Eka         Injured             Fit             Fit   
2   Eka/David     Wawo/Arya             Fit        Recovery        Recovery   
3   David/Eka  Wirawan/Arya            Sick             Fit         Injured   
4   Wawo/Arya    Eka/Dennis         Injured            Sick             Fit   

  t2_p2_condition score  
0            Sick   2-0  
1         Injured   2-1  
2             Fit   1-2  
3        Recovery   2-1  
4         Injured   1-2  


In [59]:
# Cek jumlah missing values per kolom
print("Missing data per column:\n", combined_df.isnull().sum())

Missing data per column:
 team_1             4
team_2             1
t1_p1_condition    3
t1_p2_condition    2
t2_p1_condition    1
t2_p2_condition    1
score              3
dtype: int64


In [58]:
# Normalize text data (lowercase, strip whitespace)
for col in ['t1_p1_condition', 't1_p2_condition', 't2_p1_condition', 't2_p2_condition']:
    combined_df[col] = combined_df[col].str.lower().str.strip()

# Terjemahkan semua kondisi ke bahasa Inggris untuk konsistensi
translate_map = {
    'sehat': 'fit',
    'cedera': 'injured',
    'sakit': 'sick',
    'pemulihan': 'recovery'
}

for col in ['t1_p1_condition', 't1_p2_condition', 't2_p1_condition', 't2_p2_condition']:
    combined_df[col] = combined_df[col].replace(translate_map)

In [61]:
# Buang baris yang tidak punya skor
combined_df = combined_df.dropna(subset=['score'])

# Pastikan skor benar-benar string (kalau masih ada yang float karena NaN sebelumnya)
combined_df['score'] = combined_df['score'].astype(str)

# Split skor jadi dua kolom numerik
combined_df[['team_1_score', 'team_2_score']] = combined_df['score'].str.split('-', expand=True).astype(int)

# Split nama pemain
combined_df['team_1_players'] = combined_df['team_1'].str.split('/')
combined_df['team_2_players'] = combined_df['team_2'].str.split('/')


In [62]:
print("Head:\n", combined_df.head())
print("\nTail:\n", combined_df.tail())
print(f"\nJumlah data: {len(combined_df)}")
print(f"Kolom: {combined_df.columns.tolist()}")


Head:
        team_1        team_2 t1_p1_condition t1_p2_condition t2_p1_condition  \
0  David/Moel    Wawo/Angga        recovery         injured         injured   
1  Arya/David    Dennis/Eka         injured             fit             fit   
2   Eka/David     Wawo/Arya             fit        recovery        recovery   
3   David/Eka  Wirawan/Arya            sick             fit         injured   
4   Wawo/Arya    Eka/Dennis         injured            sick             fit   

  t2_p2_condition score  team_1_score  team_2_score team_1_players  \
0            sick   2-0             2             0  [David, Moel]   
1         injured   2-1             2             1  [Arya, David]   
2             fit   1-2             1             2   [Eka, David]   
3        recovery   2-1             2             1   [David, Eka]   
4         injured   1-2             1             2   [Wawo, Arya]   

    team_2_players  
0    [Wawo, Angga]  
1    [Dennis, Eka]  
2     [Wawo, Arya]  
3  [Wirawan, 

In [65]:
# Pastikan kolom list pemain tidak mengandung NaN
combined_df['team_1_players'] = combined_df['team_1_players'].apply(lambda x: x if isinstance(x, list) else [])
combined_df['team_2_players'] = combined_df['team_2_players'].apply(lambda x: x if isinstance(x, list) else [])

# Fungsi head-to-head
def is_head_to_head(row, p1, p2):
    return (p1 in row['team_1_players'] and p2 in row['team_2_players']) or \
           (p2 in row['team_1_players'] and p1 in row['team_2_players'])

# Filter match head-to-head Arya vs Dennis
arya_vs_dennis = combined_df[combined_df.apply(lambda row: is_head_to_head(row, 'Arya', 'Dennis'), axis=1)]

# Hitung kemenangan
arya_win = 0
dennis_win = 0

for _, row in arya_vs_dennis.iterrows():
    if 'Arya' in row['team_1_players'] and row['team_1_score'] > row['team_2_score']:
        arya_win += 1
    elif 'Arya' in row['team_2_players'] and row['team_2_score'] > row['team_1_score']:
        arya_win += 1
    elif 'Dennis' in row['team_1_players'] and row['team_1_score'] > row['team_2_score']:
        dennis_win += 1
    elif 'Dennis' in row['team_2_players'] and row['team_2_score'] > row['team_1_score']:
        dennis_win += 1

print(f"Arya menang: {arya_win} kali")
print(f"Dennis menang: {dennis_win} kali")


Arya menang: 448 kali
Dennis menang: 441 kali


In [66]:
david_wins_2_0 = combined_df[
    ((combined_df['team_1_score'] == 2) & (combined_df['team_2_score'] == 0) & (combined_df['team_1'].str.contains('David'))) |
    ((combined_df['team_2_score'] == 0) & (combined_df['team_1_score'] == 2) & (combined_df['team_2'].str.contains('David')))
]

print(f"David menang dengan skor 2-0 sebanyak {len(david_wins_2_0)} kali")


David menang dengan skor 2-0 sebanyak 760 kali


In [67]:
moel_losses_1_2 = combined_df[
    ((combined_df['team_1_score'] == 1) & (combined_df['team_2_score'] == 2) & (combined_df['team_1'].str.contains('Moel'))) |
    ((combined_df['team_2_score'] == 1) & (combined_df['team_1_score'] == 2) & (combined_df['team_2'].str.contains('Moel')))
]

print(f"Moel kalah dengan skor 1-2 sebanyak {len(moel_losses_1_2)} kali")


Moel kalah dengan skor 1-2 sebanyak 714 kali


In [69]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Fitur dan target
X = combined_df[['t1_p1_condition', 't1_p2_condition', 't2_p1_condition', 't2_p2_condition']]
y = combined_df[['team_1_score', 'team_2_score']]

# One-hot encoding untuk fitur
categorical_features = X.columns.tolist()
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Split train-test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Buat pipeline model regresi
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

# Train model
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))


MAE: 0.7122809288417968
MSE: 0.6692868549390287


In [70]:
# Asumsi kondisi semua pemain: "fit"
match_data = pd.DataFrame({
    't1_p1_condition': ['fit']*10,
    't1_p2_condition': ['fit']*10,
    't2_p1_condition': ['fit']*10,
    't2_p2_condition': ['fit']*10
})

# Prediksi skor untuk 10 match
pred_scores = model.predict(match_data)

for i, score in enumerate(pred_scores, 1):
    print(f"Match {i}: Team1 (Arya/Dennis) vs Team2 (David/Moel) -> Predicted Score: {score}")


Match 1: Team1 (Arya/Dennis) vs Team2 (David/Moel) -> Predicted Score: [1.29826754 0.99010707]
Match 2: Team1 (Arya/Dennis) vs Team2 (David/Moel) -> Predicted Score: [1.29826754 0.99010707]
Match 3: Team1 (Arya/Dennis) vs Team2 (David/Moel) -> Predicted Score: [1.29826754 0.99010707]
Match 4: Team1 (Arya/Dennis) vs Team2 (David/Moel) -> Predicted Score: [1.29826754 0.99010707]
Match 5: Team1 (Arya/Dennis) vs Team2 (David/Moel) -> Predicted Score: [1.29826754 0.99010707]
Match 6: Team1 (Arya/Dennis) vs Team2 (David/Moel) -> Predicted Score: [1.29826754 0.99010707]
Match 7: Team1 (Arya/Dennis) vs Team2 (David/Moel) -> Predicted Score: [1.29826754 0.99010707]
Match 8: Team1 (Arya/Dennis) vs Team2 (David/Moel) -> Predicted Score: [1.29826754 0.99010707]
Match 9: Team1 (Arya/Dennis) vs Team2 (David/Moel) -> Predicted Score: [1.29826754 0.99010707]
Match 10: Team1 (Arya/Dennis) vs Team2 (David/Moel) -> Predicted Score: [1.29826754 0.99010707]


In [71]:
# 5 pertandingan random
sakit_vs_sehat_data = pd.DataFrame({
    't1_p1_condition': ['sick']*5,
    't1_p2_condition': ['recovery']*5,
    't2_p1_condition': ['fit']*5,
    't2_p2_condition': ['fit']*5
})

pred_scores_sakit = model.predict(sakit_vs_sehat_data)

for i, score in enumerate(pred_scores_sakit, 1):
    print(f"Match {i}: Arya (sakit) + Moel (recovery) vs random sehat -> Predicted Score: {score}")


Match 1: Arya (sakit) + Moel (recovery) vs random sehat -> Predicted Score: [0.95330571 1.47521691]
Match 2: Arya (sakit) + Moel (recovery) vs random sehat -> Predicted Score: [0.95330571 1.47521691]
Match 3: Arya (sakit) + Moel (recovery) vs random sehat -> Predicted Score: [0.95330571 1.47521691]
Match 4: Arya (sakit) + Moel (recovery) vs random sehat -> Predicted Score: [0.95330571 1.47521691]
Match 5: Arya (sakit) + Moel (recovery) vs random sehat -> Predicted Score: [0.95330571 1.47521691]


In [73]:
from itertools import combinations
import numpy as np

# Daftar semua pemain (disesuaikan dari datasetmu ya)
all_players = ['Arya', 'Dennis', 'David', 'Moel', 'Eka', 'Wawo', 'Angga', 'Wirawan']

# Simulasi lawan sehat (tetap)
opponent_data = {
    't2_p1_condition': 'fit',
    't2_p2_condition': 'fit'
}

pair_scores = []

# Loop semua kombinasi pasangan
for p1, p2 in combinations(all_players, 2):
    # Buat data simulasi 3x match per pasangan
    match_input = pd.DataFrame({
        't1_p1_condition': ['fit']*3,
        't1_p2_condition': ['fit']*3,
        't2_p1_condition': ['fit']*3,
        't2_p2_condition': ['fit']*3
    })

    # Prediksi skor
    pred = model.predict(match_input)
    avg_score = np.mean([p[0] for p in pred])  # Ambil skor tim 1

    pair_scores.append({
        'pair': f"{p1}/{p2}",
        'avg_team1_score': avg_score
    })

# Urutkan berdasarkan skor tertinggi
top_pairs = sorted(pair_scores, key=lambda x: x['avg_team1_score'], reverse=True)[:3]

# Tampilkan hasil
print("3 Pasangan Terbaik Berdasarkan Prediksi Skor:")
for i, pair in enumerate(top_pairs, 1):
    print(f"{i}. {pair['pair']} → Rata-rata skor: {pair['avg_team1_score']:.2f}")


3 Pasangan Terbaik Berdasarkan Prediksi Skor:
1. Arya/Dennis → Rata-rata skor: 1.30
2. Arya/David → Rata-rata skor: 1.30
3. Arya/Moel → Rata-rata skor: 1.30
