## INCLUDE XGBOOST IN THE LIST AND SHOW DUMP FUNCTION JUST WRITING ABOUT THE POSSIBILITIES
## INCLUDE SHAPLEY
## OPTIONAL FOR THE FUTURE: CREATE A SHINY APP THAT ALLOWS A USER TO INPUT VALUES AND GET A SCORE


In [None]:
df = pd.read_csv('ecom-user-churn-data.csv')
df.drop(['int_cat15_n'], axis= 'columns')

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier

# Load your dataset
# df = pd.read_csv('your_dataset.csv')
# X = df.drop('target_column', axis=1)
# y = df['target_column']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define models
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'GBM': GradientBoostingClassifier(),
    'Neural Network': KerasClassifier(build_fn=lambda: create_nn(X_train_scaled.shape[1]), epochs=100, batch_size=10, verbose=0)
}

# Function to create a neural network model
def create_nn(feature_dim):
    model = Sequential()
    model.add(Dense(12, input_dim=feature_dim, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Cross-validation and performance metrics
results = {}
kf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

for name, model in models.items():
    cv_results = cross_val_score(model, X_train_scaled, y_train, cv=kf, scoring='roc_auc')
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    if hasattr(model, 'predict_proba'):
        y_probs = model.predict_proba(X_test_scaled)[:, 1]
    else:
        y_probs = model.predict(X_test_scaled).ravel()
    
    auc = roc_auc_score(y_test, y_probs)
    accuracy = accuracy_score(y_test, np.round(y_probs))
    precision = precision_score(y_test, np.round(y_probs))
    recall = recall_score(y_test, np.round(y_probs))
    f1 = f1_score(y_test, np.round(y_probs))

    results[name] = {
        'CV AUC': np.mean(cv_results),
        'AUC': auc,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1
    }

# Display results
results_df = pd.DataFrame(results).transpose()
print(results_df)

# You might also want to save or plot these results
# results_df.to_csv('model_comparison_results.csv')
