In [26]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score

In [27]:
train_df = pd.read_csv('data/ddakji_level_2_train.csv')
test_df = pd.read_csv('data/ddakji_level_2_test.csv')

In [28]:
train_df['Flip_Result'] = train_df['Flip_Result'].map({'Yes': 1, 'No': 0})

# Step 4: Combine datasets to handle categorical encoding consistently
combined_df = pd.concat([train_df, test_df], keys=['train', 'test'])

# Step 5: Perform One-Hot Encoding on categorical variable 'Throw_Technique'
combined_df_encoded = pd.get_dummies(combined_df, columns=['Throw_Technique_Style'], drop_first=True)


In [29]:
# Step 6: Handle NaNs by mean imputation
imputer = SimpleImputer(strategy='mean')
combined_df_encoded.iloc[:, :] = imputer.fit_transform(combined_df_encoded)


 0. 0. 1. 0. 1. 1. 0. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 1. 0. 0. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 1. 0. 1. 0. 1.
 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
 0. 0. 0. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0.
 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 1. 0. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 1. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0.
 0. 0. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0.
 1. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 1. 1. 0. 1.

In [30]:
train_encoded = combined_df_encoded.loc['train']
test_encoded = combined_df_encoded.loc['test']

# Step 8: Define features and target
X_train = train_encoded.drop(['Throw_IDs', 'Flip_Result'], axis=1)
y_train = train_encoded['Flip_Result']
X_test = test_encoded.drop(['Throw_IDs', 'Flip_Result'], axis=1)

In [34]:
model = RandomForestClassifier(n_estimators=150, random_state=42)
model.fit(X_train, y_train)

In [35]:
train_preds = model.predict(X_train)
print(f'Training Accuracy: {accuracy_score(y_train, train_preds):.2%}')

# Step 10: Make predictions
test_preds = model.predict(X_test)
test_preds_labels = ['Yes' if pred == 1 else 'No' for pred in test_preds]

# Step 11: Export submission CSV
submission = pd.DataFrame({
    'Throw_IDs': test_df['Throw_IDs'],
    'Flip_Result': test_preds_labels
})

submission.to_csv('submission.csv', index=False)

print("✅ Submission CSV created successfully!")

Training Accuracy: 100.00%
✅ Submission CSV created successfully!
