# Import Libraries

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Data Preprocessing

In [None]:
#Import data set and separate columns at ;
train = pd.read_csv("train.csv", delimiter = ';')
test = pd.read_csv("test.csv", delimiter = ';')

In [None]:
# Concatenate the two dataframes vertically (along rows)
Banking_data = pd.concat([train, test], ignore_index=True)

In [None]:
#Get cloumns names
Banking_data.columns

In [None]:
#check for null values
Banking_data.isnull().sum().sort_values(ascending=False)

In [None]:
# Save the merged dataframe to a new CSV file
Banking_data.to_csv("Banking_data.csv", index=False)

In [None]:
df = pd.read_csv("Banking_data.csv")

In [None]:
df

In [None]:
#Drop columns with too many unkown values
df.drop(columns = ['contact', 'poutcome'], axis = 'columns', inplace=True)

In [None]:
#Drop unwanted columns
df.drop(columns = ['marital', 'education','default', 'day', 'campaign'], axis = 'columns', inplace=True)

In [None]:
df

In [None]:
#Change prediction column name
df.rename(columns = {'y':'FDcreated'}, inplace=True)

In [None]:
#Rename unclear column names
df.rename(columns = {'housing' : 'housingLoan', 'loan': 'personalLoan'}, inplace=True)

In [None]:
df

# Encoding

In [None]:
#change yes = 1 no = 0
df['housingLoan'] = df['housingLoan'].map({'yes': 1, 'no':0})
df['personalLoan'] = df['personalLoan'].map({'yes': 1, 'no':0})
df['FDcreated'] = df['FDcreated'].map({'yes': 1, 'no':0})

#Give values for each job and each month
df['job'] = df['job'].map({'admin': 1, 'unknown':2 ,'unemployed':3, 'management':4, 'housemaid':5, 'entrepreneur':6, 'student':7, 'blue-collar':8, 'self-employed':9, 'retired':10, 'technician':11, 'services':12})
df['month'] = df['month'].map({'jan': 1, 'feb':2 ,'mar':3, 'apr':4, 'may':5, 'jun':6, 'jul':7, 'aug':8, 'sep':9, 'oct':10, 'nov':11, 'dec':12})

df

In [None]:
#drop NAN values
df = df.dropna()

# Data Visualizing

(1)Plot of the Created FD Account or not - Count

In [None]:
import matplotlib.pyplot as plt

# Count the occurrences of 1s and 0s in the 'FDcreated' column
fd_counts = df['FDcreated'].value_counts()

# Create a bar plot
plt.bar(fd_counts.index, fd_counts.values)

# Add labels and title
plt.xlabel('FD Created (1) vs Not Created (0)')
plt.ylabel('Count')
plt.title('Distribution of FD Creation')

# Show the plot
plt.show()

(2)Compare the ages of customers who took fixed deposit and who didn’t

In [None]:
import matplotlib.pyplot as plt

# Model Training

In [None]:
#divide dataset into independent and dependent variables
X = df.iloc[:,:-1].values 
y = df.iloc[:,-1].values 

In [None]:
df.to_csv('df.csv', index=False)

In [None]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

Decision Tree classifier

In [None]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import pickle

# Create the Decision Tree Classifier model
dt = DecisionTreeClassifier()

# Train the model
dt.fit(X_train, y_train)

# Make predictions on the test set
dt_pred = dt.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, dt_pred)
print("Accuracy of Decision Tree classifier :", accuracy)

# Create a pickle file for the model
dt_pickle = open('dt_model.pkl', 'wb') 
pickle.dump(dt, dt_pickle)
dt_pickle.close()

KNeighbors Classifier

In [None]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pickle

# Create the KNeighborsClassifier model
knn = KNeighborsClassifier(n_neighbors=5)

# Train the model
knn.fit(X_train, y_train)

# Make predictions on the test set
knn_pred = knn.predict(X_test)

# Evaluate the model
knn_accuracy = accuracy_score(y_test, knn_pred)
print("Accuracy of KNeighbors Classifier:", knn_accuracy)

# Create a pickle file for the model
knn_pickle = open('knn_model.pkl', 'wb') 
pickle.dump(knn, knn_pickle)
knn_pickle.close()

Linear regression Model

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import pickle

# Create the LinearRegression model
reg = LinearRegression()

# Train the model
reg.fit(X_train, y_train)

# Make predictions on the test set
reg_pred = reg.predict(X_test)

# Evaluate the model (using Mean Squared Error as it's a regression problem)
mse = mean_squared_error(y_test, reg_pred)
print("Mean Squared Error (MSE) of Linear Regression Model:", mse)

lr_acc = 1-mse
print("Accuracy of Linear regression Model classifier :", lr_acc)

# Create a pickle file for the model
linear_regression_pickle = open('linear_regression_model.pkl', 'wb') 
pickle.dump(reg, linear_regression_pickle)
linear_regression_pickle.close()


Support Vector Machine

In [None]:
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import pickle

# Create the SVC model
svc = SVC()

# Train the model
svc.fit(X_train, y_train)

# Make predictions on the test set
svc_pred = svc.predict(X_test)

# Evaluate the model
svc_accuracy = accuracy_score(y_test, svc_pred)
print("Accuracy of Support Vector Machine:", svc_accuracy)

svc_error = 1 - svc_accuracy
print("Error of Support Vector Machine:", svc_error)

# Create a pickle file for the model
svc_pickle = open('support_vector_machine.pkl', 'wb') 
pickle.dump(svc, svc_pickle)
svc_pickle.close()
