In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

In [None]:
data = pd.read_csv("/content/campaign_responses.csv")

In [None]:
data.head(5)

Unnamed: 0,customer_id,age,gender,annual_income,credit_score,employed,marital_status,no_of_children,responded
0,1,35,Male,65000,720,Yes,Married,2,Yes
1,2,28,Female,45000,680,No,Single,0,No
2,3,42,Male,85000,750,Yes,Married,3,Yes
3,4,31,Female,55000,710,Yes,Single,1,No
4,5,47,Male,95000,790,Yes,Married,2,Yes


In [None]:
# Encode categorical variables
label_encoders = {}
for column in data.select_dtypes(include='object').columns:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

In [None]:
# Define features (X) and target variable (y)
X = data.drop(columns=['responded'])
y = data['responded']

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Initialize models
models = {
    'Logistic Regression': LogisticRegression(random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42),
    'Support Vector Machine': SVC(random_state=42),
    'K-Nearest Neighbors': KNeighborsClassifier()
}

In [None]:
# Train and evaluate each model
results = {}
for name, model in models.items():
    print(f"\n{name}")
    # Train the model
    model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Evaluate the model
    acc = accuracy_score(y_test, y_pred)
    print("Accuracy:", acc)
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))

    # Cross-validation score
    cv_scores = cross_val_score(model, X_train, y_train, cv=5)
    print("Cross-validation accuracy:", np.mean(cv_scores))

    # Store results
    results[name] = acc



Logistic Regression
Accuracy: 1.0
Confusion Matrix:
 [[5 0]
 [0 7]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         5
           1       1.00      1.00      1.00         7

    accuracy                           1.00        12
   macro avg       1.00      1.00      1.00        12
weighted avg       1.00      1.00      1.00        12

Cross-validation accuracy: 1.0

Decision Tree
Accuracy: 0.8333333333333334
Confusion Matrix:
 [[5 0]
 [2 5]]
Classification Report:
               precision    recall  f1-score   support

           0       0.71      1.00      0.83         5
           1       1.00      0.71      0.83         7

    accuracy                           0.83        12
   macro avg       0.86      0.86      0.83        12
weighted avg       0.88      0.83      0.83        12

Cross-validation accuracy: 1.0

Random Forest
Accuracy: 0.9166666666666666
Confusion Matrix:
 [[5 0]
 [1 6]]
Classificat

In [None]:
# Compare model performances
print("\nModel Performances:")
for model, acc in results.items():
    print(f"{model}: {acc:.4f}")


Model Performances:
Logistic Regression: 1.0000
Decision Tree: 0.8333
Random Forest: 0.9167
Gradient Boosting: 1.0000
Support Vector Machine: 1.0000
K-Nearest Neighbors: 1.0000
