<a href="https://colab.research.google.com/github/Keerthana8888/Term-Deposit-Prediction/blob/main/Code%20File/Term_Deposit_Predicting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Load The Data And Libraries

In [None]:
#Standard Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Models Selection
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split,RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree
from sklearn.ensemble import RandomForestClassifier , AdaBoostClassifier , GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV
# Evaluators
from sklearn.metrics import accuracy_score , classification_report , confusion_matrix

In [None]:
dataset=pd.read_csv('/content/bank-additional-full.csv',sep=';')

In [None]:
dataset.info()

In [None]:
dataset.head()

# Exploratory Data Analysis

In [None]:
dataset.describe()

## Uni-Varient

In [None]:
dataset.skew()

In [None]:
for i in dataset:
  sns.displot(dataset[i], kde=True)
  plt.figure(figsize=(20,10))

## Bi-Varient

In [None]:
plt.figure(figsize=(12,10))
sns.heatmap(dataset.corr(), annot=True, cmap='BuPu')

# Data Preparation

## Lable Encoding

In [None]:
data_transformed=pd.get_dummies(dataset.iloc[:,:-1])

In [None]:
data_transformed.head()

In [None]:
data_transformed.info()

In [None]:
x=data_transformed
y=dataset['y']

In [None]:
print(x.shape)
print(y.shape)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state=42)

# Multi-Varient

## Logistic Regression

In [None]:
lr = LogisticRegression()
lr.fit(x_train,y_train)

In [None]:
y_hat_train_lr = lr.predict(x_train)
y_hat_test_lr = lr.predict(x_test)

In [None]:
def model_eval(actual, predicted):
  conf_matrix = confusion_matrix(actual,predicted)
  acc_score = accuracy_score(actual, predicted)
  clas_rep = classification_report(actual, predicted)
  print('The Accuracy of the model is: ', round(acc_score,2))
  print(conf_matrix)
  print(clas_rep)

In [None]:
model_eval(y_train, y_hat_train_lr)

In [None]:
model_eval(y_test, y_hat_test_lr)

## Decision Tree

In [None]:
dtree=DecisionTreeClassifier()
dtree.fit(x_train,y_train)

In [None]:
y_hat_train_dtree = dtree.predict(x_train)
y_hat_test_dtree = dtree.predict(x_test)

In [None]:
model_eval(y_train, y_hat_train_dtree)

In [None]:
model_eval(y_test, y_hat_test_dtree)

In [None]:
plt.figure(figsize=(15,10))
clf=DecisionTreeClassifier(max_depth=4)
clf.fit(x_train, y_train)
plot_tree(clf, filled=True)
plt.title("Decision tree trained on Bank D")
plt.show()

## Random Forest

In [None]:
rf=RandomForestClassifier()
rf.fit(x_train, y_train)

In [None]:
y_hat_train_rf=rf.predict(x_train)
y_hat_test_rf=rf.predict(x_test)

In [None]:
model_eval(y_test, y_hat_test_rf)

In [None]:
model_eval(y_test, y_hat_test_rf)

## AdaBoost

In [None]:
ada=AdaBoostClassifier()
ada.fit(x_train, y_train)

In [None]:
y_hat_train_ada = ada.predict(x_train)
y_hat_test_ada = ada.predict(x_test)

In [None]:
model_eval(y_train, y_hat_train_ada)

In [None]:
model_eval(y_test, y_hat_test_ada)

## Gradient Boost

In [None]:
gb=GradientBoostingClassifier()
gb.fit(x_train, y_train)

In [None]:
y_hat_train_gb=gb.predict(x_train)
y_hat_test_gb=gb.predict(x_test)

In [None]:
model_eval(y_train, y_hat_train_gb)

In [None]:
model_eval(y_test, y_hat_test_gb)

## XGBoosting

In [None]:
le=LabelEncoder()
y_train_enc=le.fit_transform(y_train)
y_test_enc=le.fit_transform(y_test)

In [None]:
xgb=XGBClassifier()
xgb.fit(x_train, y_train_enc)

In [None]:
y_hat_train_xgb = xgb.predict(x_train)
y_hat_test_xgb = xgb.predict(x_test)

In [None]:
model_eval(y_train_enc, y_hat_train_xgb)

In [None]:
model_eval(y_test_enc, y_hat_test_xgb)

## KNearestNeighbors

In [None]:
acc_list = []
for i in range(1,100):
  knn2 = KNeighborsClassifier(n_neighbors = i)
  knn2.fit(x_train , y_train)
  y_hat_test_knn2 = knn2.predict(x_test)
  acc_list.append(round(accuracy_score(y_test , y_hat_test_knn2) , 2))
print(acc_list)

In [None]:
x = np.array([i for i in range(1,100)])
y = acc_list
plt.figure(figsize=(15, 10))
plt.plot(x, y, marker='o', linestyle='-')
# Add annotations
for i, (xi, yi) in enumerate(zip(x, y)):
    plt.annotate(f'({xi}, {yi})', (xi, yi), textcoords="offset points", xytext=(0, 10), ha='center' , rotation = 90)
plt.xlabel("No of K")
plt.ylabel("Accuracy Score")
plt.show()

In [None]:
knn = KNeighborsClassifier(n_neighbors = 8)
knn.fit(x_train , y_train )

In [None]:
y_hat_train_knn = knn.predict(x_train)
y_hat_test_knn = knn.predict(x_test)

In [None]:
model_eval(y_train , y_hat_train_knn)

In [None]:
model_eval(y_test , y_hat_test_knn)

## Support Vector Machine Model

In [None]:
svm = SVC(kernel = 'sigmod' , degree = 4)
svm.fit(x_train , y_train)

In [None]:
y_hat_train_svm = svm.predict(x_train)
y_hat_test_svm = svm.predict(x_test)

In [None]:
model_eval(y_train , y_hat_train_svm)

In [None]:
model_eval(y_test , y_hat_test_svm)

##SVM with Randomized Search CV

In [None]:
params = {"c": np.arange(2,10,2),
          "kernal": ['linear','ploy','rbf','sigmoid'],
          "degree": (3, 4, 5, 6)}

In [None]:
random_search_cv = RandomizedSearchCV(estimator=svm,param_distributions=params,
                                      verbose=True)

In [None]:
random_search_cv.fit (x_train, y_train)

# Model Selection

In [None]:
accuracy_table=[['Logistic Regression',round(accuracy_score(y_test , y_hat_test_lr),2)],
  ['Decision Tree', round(accuracy_score(y_test , y_hat_test_dtree),2)],
  ['Random Forest' , round(accuracy_score(y_test , y_hat_test_rf),2)],
  ['Ada Boosting' , round(accuracy_score(y_test , y_hat_test_ada),2)],
  ['Gradient Boosting' , round(accuracy_score(y_test , y_hat_test_gb),2)],
  ['XGBoosting' , round(accuracy_score(y_test_enc, y_hat_test_xgb),2)],
  ['KNN' , round(accuracy_score(y_test , y_hat_test_knn),2)],
  ['SVM', round(accuracy_score(y_test , y_hat_test_svm),2)]             ]
df1 = pd.DataFrame(accuracy_table, columns = ['Model','Test_Accuracy'])
print(df1)