In [1]:
import pandas as pd
from sqlalchemy import create_engine
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

engine = create_engine('postgresql://postgres:postgres@localhost:5432/bank_churners')

query = "SELECT * FROM new_bank_churners_data"
df_from_sql = pd.read_sql(query, engine)

In [2]:
# Separar las características (X) y la variable objetivo (y)
X = df_from_sql.drop(columns=['attrition_flag_existing_customer'])
y = df_from_sql['attrition_flag_existing_customer']


In [3]:
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [4]:
# Parámetros para iterar
n_estimators_options = [50, 100, 200]
max_depth_options = [None, 10, 20, 30]


In [10]:
results = []


In [11]:
# Iterar sobre diferentes combinaciones de hiperparámetros
for n_estimators in n_estimators_options:
    for max_depth in max_depth_options:
        clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
        clf.fit(X_train, y_train)
        
        # Evaluar la precisión en el conjunto de entrenamiento
        y_train_pred = clf.predict(X_train)
        train_accuracy = accuracy_score(y_train, y_train_pred)
        
        # Evaluar la precisión en el conjunto de prueba
        y_test_pred = clf.predict(X_test)
        test_accuracy = accuracy_score(y_test, y_test_pred)
        
        # Guardar resultados en la lista
        results.append({
            'n_estimators': n_estimators,
            'max_depth': max_depth,
            'train_accuracy': train_accuracy,
            'test_accuracy': test_accuracy
        })


In [12]:
# Convertir la lista de resultados a DataFrame
results_df = pd.DataFrame(results)

In [13]:
# Exportar resultados a CSV
results_df.to_csv('model_optimization_results.csv', index=False)

In [14]:
# Mostrar el rendimiento del mejor modelo
best_model = results_df.loc[results_df['test_accuracy'].idxmax()]
print(f"Best Model: n_estimators={best_model['n_estimators']}, max_depth={best_model['max_depth']}")
print(f"Train Accuracy: {best_model['train_accuracy']}")
print(f"Test Accuracy: {best_model['test_accuracy']}")


Best Model: n_estimators=50.0, max_depth=nan
Train Accuracy: 1.0
Test Accuracy: 1.0
