# Hackathon ML Template

Quick experimentation template

In [None]:
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
from src.models import RandomForest
from src.data import DataHandler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.impute import SimpleImputer 
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [5]:
# Quick configuration
config = {
    'target_column': 'species',
    'test_size': 0.2,
    'random_state': 42,
    'model_params': {
        'n_estimators': 100,
        'random_state': 42
    }
}

In [None]:
# Load and prepare data
data_handler = DataHandler(config)
X, y = data_handler.load_data('../data/iris.csv')
X_train, X_test, y_train, y_test = data_handler.split_data(X, y)

In [None]:
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('std_scaler', StandardScaler())
])

In [None]:
cat_pipeline = Pipeline([
    ('onehot', OneHotEncoder())
])

In [None]:
full_pipeline = ColumnTransformer([
    ('num', num_pipeline, data_handler.numerical_features),
    ('cat', cat_pipeline, data_handler.categorical_features)
])

In [7]:
# Train and evaluate model
model = RandomForest(config)
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)

# Print results
print(f"Accuracy: {accuracy_score(y_test, predictions):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, predictions))

Accuracy: 1.0000

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00         9
   virginica       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

