In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import warnings
warnings.filterwarnings("ignore")
data=pd.read_csv('/kaggle/input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv')

In [None]:
data.columns

In [None]:
data.info()

In [None]:
data.shape

In [None]:
data.describe

In [None]:
data.drop(columns=['customerID'],axis=1,inplace=True)

In [None]:
data.head()

In [None]:
cat_var=[i for i in data.select_dtypes(include=[object]).columns]

numeric_var=[i for i in data.select_dtypes(include=["float",'int']).columns]

len(cat_var+numeric_var)==len(data.columns)

In [None]:
from sklearn import preprocessing 

label_encoder = preprocessing.LabelEncoder() 
  
for var in cat_var:
    data[var]= label_encoder.fit_transform(data[var]) 

In [None]:
X=data.iloc[:,:-1]
y=data.iloc[:,-1:]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.20, random_state=42)  

In [None]:
from sklearn.metrics import accuracy_score, log_loss
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from xgboost import XGBClassifier
import xgboost

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="rbf", C=0.025, probability=True),
    NuSVC(probability=True),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    XGBClassifier(),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    GaussianNB(),
    LinearDiscriminantAnalysis(),
    QuadraticDiscriminantAnalysis()]

log_cols=["Classifier", "Accuracy", "Log Loss","precision","recall","f1"]
log = pd.DataFrame(columns=log_cols)

for clf in classifiers:
    clf.fit(X_train, y_train)
    name = clf.__class__.__name__
    
    print("="*30)
    print(name)
    
    print('****Results****')
    train_predictions = clf.predict(X_test)
    acc = accuracy_score(y_test, train_predictions)
    print("Accuracy: {:.4%}".format(acc))
    
    train_predictions = clf.predict_proba(X_test)
    ll = log_loss(y_test, train_predictions)
    print("Log Loss: {}".format(ll))
    
    train_predictions = clf.predict(X_test)
    precision = precision_score(y_test, train_predictions)
    print("precision: {}".format(precision))


    recall = recall_score(y_test, train_predictions)

    print("recall: {}".format(precision))
    f1 = f1_score(y_test, train_predictions)
    print("f1: {}".format(precision))

    
    log_entry = pd.DataFrame([[name, acc*100, ll,precision,recall,f1]], columns=log_cols)
    log = log.append(log_entry)
    
print("="*30)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_color_codes("muted")
sns.barplot(x='Accuracy', y='Classifier', data=log, color="y")

plt.xlabel('Accuracy %')
plt.title('Classifier Accuracy')
plt.show()

In [None]:
log