1. Data Preprocessing:

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the dataset
data = pd.read_csv('C:\Users\ST\Desktop\codsoft\task_3.csv')

# Check for missing values
print(data.isnull().sum())

# Handle categorical features (if any)
label_encoders = {}
categorical_columns = ['Geography', 'Gender']  # Example categorical columns
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

# Split the data into features (X) and labels (y)
X = data.drop(columns=['Exited'])
y = data['Exited']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


2. Choose a Classifier and Train the Model:

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the classifier (you can choose other classifiers too)
classifier = RandomForestClassifier(random_state=42)

# Train the classifier
classifier.fit(X_train, y_train)


3. Evaluate the Model:

In [None]:
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

# Predict labels for the test set
y_pred = classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Generate classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Calculate ROC AUC score
roc_auc = roc_auc_score(y_test, y_pred)
print("ROC AUC Score:", roc_auc)


4. Feature Importance (Optional):

In [None]:
import matplotlib.pyplot as plt

# Get feature importances from the trained model
feature_importances = classifier.feature_importances_

# Get feature names
feature_names = X.columns

# Create a DataFrame to visualize feature importances
feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances})

# Sort the features by importance in descending order
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

# Plot feature importances
plt.figure(figsize=(10, 6))
plt.barh(feature_importance_df['Feature'], feature_importance_df['Importance'])
plt.xlabel('Feature Importance')
plt.ylabel('Feature')
plt.title('Feature Importance')
plt.show()
