<a href="https://colab.research.google.com/github/Fazle-Rakib/machine-learning-basics/blob/main/Final%20Assignment/BankingSVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import numbers

In [2]:
from sklearn.preprocessing import LabelEncoder
label_encoder_instance = LabelEncoder()

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [4]:
!unzip '/content/drive/MyDrive/ML Assignment Dataset/bank-data.zip'

Archive:  /content/drive/MyDrive/ML Assignment Dataset/bank-data.zip
   creating: bank-data/
  inflating: __MACOSX/._bank-data    
  inflating: bank-data/.DS_Store     
  inflating: __MACOSX/bank-data/._.DS_Store  
  inflating: bank-data/bank-full.csv  
  inflating: __MACOSX/bank-data/._bank-full.csv  


In [5]:
df = pd.read_csv('/content/bank-data/bank-full.csv', sep=';')
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


## Determining the features

In [7]:
df['poutcome'].unique()

array(['unknown', 'failure', 'other', 'success'], dtype=object)

In [10]:
column_counts = df['poutcome'].value_counts()
column_counts

unknown    36959
failure     4901
other       1840
success     1511
Name: poutcome, dtype: int64

In [12]:
relevant_counts = column_counts.loc[['success', 'other']]
relevant_counts

success    1511
other      1840
Name: poutcome, dtype: int64

In [None]:
for column in df.columns:
  print(column)
  print(f'{df[column].value_counts()}')

In [6]:
df = df.drop(['default', 'contact', 'day', 'month', 'duration'], axis = 1)
df.columns

Index(['age', 'job', 'marital', 'education', 'balance', 'housing', 'loan',
       'campaign', 'pdays', 'previous', 'poutcome', 'y'],
      dtype='object')

## Dataset Preprocessing

In [7]:
# Convert categorical string data to numeric
def data_conversion(column):
  return label_encoder_instance.fit_transform(column)

In [8]:
# Scaling the features
def mean_scaling(ara):
  return (ara-ara.min())/(ara.max()-ara.min())

In [9]:
for column in df.columns:
  if not isinstance(df[column].unique()[0], numbers.Number):
    df[column] = data_conversion(df[column])
    print(f'{column} - min:{df[column].unique().min()} max:{df[column].unique().max()}')
  else:
    df[column] = mean_scaling(df[column])
    print(f'{column} - min:{df[column].min()} max:{df[column].max()}')

age - min:0.0 max:1.0
job - min:0 max:11
marital - min:0 max:2
education - min:0 max:3
balance - min:0.0 max:1.0
housing - min:0 max:1
loan - min:0 max:1
campaign - min:0.0 max:1.0
pdays - min:0.0 max:1.0
previous - min:0.0 max:1.0
poutcome - min:0 max:3
y - min:0 max:1


### Train-test Split

In [10]:
X = df.drop('y', axis=1)
y = df['y']
X.head()

Unnamed: 0,age,job,marital,education,balance,housing,loan,campaign,pdays,previous,poutcome
0,0.519481,4,1,2,0.092259,1,0,0.0,0.0,0.0,3
1,0.337662,9,2,1,0.073067,1,0,0.0,0.0,0.0,3
2,0.194805,2,1,1,0.072822,1,1,0.0,0.0,0.0,3
3,0.376623,1,1,3,0.086476,1,0,0.0,0.0,0.0,3
4,0.194805,11,2,3,0.072812,0,0,0.0,0.0,0.0,3


In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)  

In [12]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((36168, 11), (9043, 11), (36168,), (9043,))

## Training the SVM model on train-set & evaluations

In [27]:
svm_model = SVC(kernel='rbf', gamma='auto')

In [28]:
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

In [29]:
# Evaluate the model using common metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

# Print the evaluation metrics
print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1 score:', f1)

Accuracy: 0.8857679973460135
Precision: 0.8073081709197981
Recall: 0.5404207439872968
F1 score: 0.5456701181900204



Linear Kernel
```python
Accuracy: 0.8793541966161672
Precision: 0.4396770983080836
Recall: 0.5
F1 score: 0.4679023242130039
```

RBF with gamma set to auto
```
Accuracy: 0.8857679973460135
Precision: 0.8073081709197981
Recall: 0.5404207439872968
F1 score: 0.5456701181900204
```
RBF with gamma set to scale
```
Accuracy: 0.8793541966161672
Precision: 0.4396770983080836
Recall: 0.5
F1 score: 0.4679023242130039
```

Polynomial Kernel
```
Accuracy: 0.8793541966161672
Precision: 0.4396770983080836
Recall: 0.5
F1 score: 0.4679023242130039
```





