In [None]:
# Import Libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import joblib

# Load Selected Features
data_path = '../data/processed_data.csv'
df = pd.read_csv(data_path)
target_column = 'Bankrupt?'

X = df.drop(columns=[target_column])
y = df[target_column]

# Use only the final selected features
final_features_path = '../results/selected_features.txt'
with open(final_features_path, 'r') as f:
    selected_features = f.read().splitlines()
X_selected = X[selected_features]

# Split Dataset
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Initialize Models
models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Support Vector Machine": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier()
}

# Train and Evaluate Models
model_results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\n{name} Classification Report:\n", classification_report(y_test, y_pred))
    model_results[name] = accuracy

# Select the Best Model
best_model_name = max(model_results, key=model_results.get)
best_model = models[best_model_name]
print(f"Best model: {best_model_name} with accuracy: {model_results[best_model_name]:.2f}")

# Save the Best Model
best_model_path = '../results/best_model.pkl'
joblib.dump(best_model, best_model_path)
print(f"Best model saved to {best_model_path}.")
