In [None]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [None]:
data=pd.read_csv('/content/customer_churn_dataset-testing-master.csv')

data_reduced = data.drop(['CustomerID'], axis=1)

for column in data_reduced.columns:
    if data_reduced[column].dtypes=='object':
      unique_values = data[column].unique()
      print(f"Feature '{column}' has {len(unique_values)} unique values: {unique_values}")
print(data_reduced.head())


Feature 'Gender' has 2 unique values: ['Female' 'Male']
Feature 'Subscription Type' has 3 unique values: ['Basic' 'Standard' 'Premium']
Feature 'Contract Length' has 3 unique values: ['Monthly' 'Annual' 'Quarterly']
   Age  Gender  Tenure  Usage Frequency  Support Calls  Payment Delay  \
0   22  Female      25               14              4             27   
1   41  Female      28               28              7             13   
2   47    Male      27               10              2             29   
3   35    Male       9               12              5             17   
4   53  Female      58               24              9              2   

  Subscription Type Contract Length  Total Spend  Last Interaction  Churn  
0             Basic         Monthly          598                 9      1  
1          Standard         Monthly          584                20      0  
2           Premium          Annual          757                21      0  
3           Premium       Quarterly      

In [None]:
# One-hot encoding for Subscription Type and Contract Length
data_encoded = pd.get_dummies(data_reduced, columns=['Subscription Type', 'Contract Length','Gender'], drop_first=False)


print(data_encoded.head())


   Age  Tenure  Usage Frequency  Support Calls  Payment Delay  Total Spend  \
0   22      25               14              4             27          598   
1   41      28               28              7             13          584   
2   47      27               10              2             29          757   
3   35       9               12              5             17          232   
4   53      58               24              9              2          533   

   Last Interaction  Churn  Subscription Type_Basic  \
0                 9      1                     True   
1                20      0                    False   
2                21      0                    False   
3                18      0                    False   
4                18      0                    False   

   Subscription Type_Premium  Subscription Type_Standard  \
0                      False                       False   
1                      False                        True   
2                   

In [None]:
# Feature engineering (Example)
X = data_encoded.drop(['Churn'], axis=1)  # Features
y = data_encoded['Churn']  # Target variable

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Initialize SVM model
svm_model = SVC(kernel='rbf', probability=True, random_state=42)

In [None]:
# Train the model
svm_model.fit(X_train, y_train)



In [None]:
# Predictions
y_pred = svm_model.predict(X_test)

# Evaluate the model
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Confusion Matrix:
 [[6308  485]
 [ 322 5760]]

Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.93      0.94      6793
           1       0.92      0.95      0.93      6082

    accuracy                           0.94     12875
   macro avg       0.94      0.94      0.94     12875
weighted avg       0.94      0.94      0.94     12875



In [None]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9373203883495146


In [None]:

from sklearn.model_selection import GridSearchCV

# Define hyperparameters
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf', 'poly', 'linear']
}

# Grid search
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2, cv=2)
grid.fit(X_train, y_train)

# Best parameters
print("Best Parameters:", grid.best_params_)

# Evaluate the best model
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Fitting 2 folds for each of 36 candidates, totalling 72 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time= 1.2min
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time= 1.2min
[CV] END ........................C=0.1, gamma=1, kernel=poly; total time= 4.4min
[CV] END ........................C=0.1, gamma=1, kernel=poly; total time= 4.5min
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=  21.7s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=  21.7s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=  28.6s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=  29.3s
[CV] END ......................C=0.1, gamma=0.1, kernel=poly; total time=  20.7s
[CV] END ......................C=0.1, gamma=0.1, kernel=poly; total time=  19.7s
[CV] END ....................C=0.1, gamma=0.1, kernel=linear; total time=  21.6s
[CV] END ....................C=0.1, gamma=0.1, k

KeyboardInterrupt: 

In [None]:
# Evaluate the model
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))