**SVM - Support Vector Machine**

In machine learning, support vector machines are supervised learning models with associated learning algorithms that analyze data for classification and regression analysis.

**1. PREPARING THE DATA**

In [1]:
# Importing LIBRARIES


import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [2]:
# Importing datasets needed for this activity from google colab
from google.colab import drive

# LINK of the CSV datasets

url = 'https://drive.google.com/file/d/16JoNZGR2rFfZn-ptstpSDK3XxLVqaq6j/view?usp=sharing'
file_id = url.split('/')[-2]
drive = 'https://drive.google.com/uc?id=' + file_id
df = pd.read_csv(drive)
df

Unnamed: 0,ID,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online,CreditCard
0,1,25,1,49,91107,4,1.6,1,0.0,0.0,1.0,0,0,0
1,2,45,19,34,90089,3,1.5,1,0.0,0.0,1.0,0,0,0
2,3,39,15,11,94720,1,1.0,1,0.0,0.0,0.0,0,0,0
3,4,35,9,100,94112,1,2.7,2,0.0,0.0,0.0,0,0,0
4,5,35,8,45,91330,4,1.0,2,0.0,0.0,0.0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,4996,29,3,40,92697,1,1.9,3,0.0,0.0,0.0,0,1,0
4996,4997,30,4,15,92037,4,0.4,1,85.0,0.0,0.0,0,1,0
4997,4998,63,39,24,93023,2,0.3,3,0.0,0.0,0.0,0,0,0
4998,4999,65,40,49,90034,3,0.5,2,0.0,0.0,0.0,0,1,0


**2. DATA PROCESSING**

In [3]:
# CHECKING for NULL VALUES
df.isna().sum()

ID                    0
Age                   0
Experience            0
Income                0
ZIP Code              0
Family                0
CCAvg                 0
Education             0
Mortgage              2
Personal Loan         1
Securities Account    1
CD Account            0
Online                0
CreditCard            0
dtype: int64

In [4]:
# TAKING CARE of MISSING DATA


# .mode fills the column with the most frequent Value, This can be use for data that are not numerical.
# .mean fills the column with the mean of that column.

# df[''] = df[''].fillna(df[''].mode()[0])
# df[''] = df[''].fillna(df[''].mean())


df['Mortgage'] = df['Mortgage'].fillna(df['Mortgage'].mean())
df['Personal Loan'] = df['Personal Loan'].fillna(df['Personal Loan'].mean())
df['Securities Account'] = df['Securities Account'].fillna(df['Securities Account'].mean())


# CHECKING FOR NULL again.
df.isna().sum()


ID                    0
Age                   0
Experience            0
Income                0
ZIP Code              0
Family                0
CCAvg                 0
Education             0
Mortgage              0
Personal Loan         0
Securities Account    0
CD Account            0
Online                0
CreditCard            0
dtype: int64

In [5]:
# Encoding categorical data.
# X is independent.
# Y is dependent.

# Values [Slice, Columns]
# .drop DROPS the inputed column.

df = df.drop('ID',axis=1)

X = df.drop('CreditCard', axis =1)
y = df['CreditCard']

print("The values of X are",X)

print("The values of Y are",y)

The values of X are       Age  Experience  Income  ZIP Code  Family  CCAvg  Education  Mortgage  \
0      25           1      49     91107       4    1.6          1       0.0   
1      45          19      34     90089       3    1.5          1       0.0   
2      39          15      11     94720       1    1.0          1       0.0   
3      35           9     100     94112       1    2.7          2       0.0   
4      35           8      45     91330       4    1.0          2       0.0   
...   ...         ...     ...       ...     ...    ...        ...       ...   
4995   29           3      40     92697       1    1.9          3       0.0   
4996   30           4      15     92037       4    0.4          1      85.0   
4997   63          39      24     93023       2    0.3          3       0.0   
4998   65          40      49     90034       3    0.5          2       0.0   
4999   28           4      83     92612       3    0.8          1       0.0   

      Personal Loan  Securities

In [6]:
# Checking first 4-6 heads of X.

X.head()

Unnamed: 0,Age,Experience,Income,ZIP Code,Family,CCAvg,Education,Mortgage,Personal Loan,Securities Account,CD Account,Online
0,25,1,49,91107,4,1.6,1,0.0,0.0,1.0,0,0
1,45,19,34,90089,3,1.5,1,0.0,0.0,1.0,0,0
2,39,15,11,94720,1,1.0,1,0.0,0.0,0.0,0,0
3,35,9,100,94112,1,2.7,2,0.0,0.0,0.0,0,0
4,35,8,45,91330,4,1.0,2,0.0,0.0,0.0,0,0


In [7]:
# Checking first 4-6 rows heads of Y.
y.head()

0    0
1    0
2    0
3    0
4    1
Name: CreditCard, dtype: int64

In [8]:
# Splitting Train and Test values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=None)

In [9]:
# FEATURE SCALING
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

**3. SVM - Support Vector Machine**

In [11]:
# IMPORT SVC
from sklearn.svm import SVC

# Creating a instance
classifier = SVC(kernel='rbf',random_state=None)

# Fitting the model
classifier.fit(X_train, y_train)

In [12]:
classifier.intercept_

array([-0.4590496])

In [13]:
classifier.n_support_

array([1268,  941], dtype=int32)

In [14]:
classifier.support_vectors_

array([[-0.10791319, -0.17389229, -0.46193197, ..., -0.32598126,
        -0.24943031,  0.8321397 ],
       [ 0.41690777,  0.52437016, -0.76525222, ..., -0.32598126,
        -0.24943031,  0.8321397 ],
       [ 0.85425857,  0.96078419, -1.17690113, ..., -0.32598126,
        -0.24943031,  0.8321397 ],
       ...,
       [-0.63273414, -0.69758912, -0.98190954, ..., -0.32598126,
        -0.24943031, -1.2017213 ],
       [-1.59490589, -1.65769999, -0.0069516 , ..., -0.32598126,
        -0.24943031,  0.8321397 ],
       [ 0.76678841,  0.69893577,  1.96463   , ..., -0.32598126,
         4.00913591,  0.8321397 ]])

In [16]:
# PREDICTING the values
y_pred_train = classifier.predict(X_train)
y_pred_test = classifier.predict(X_test)

In [17]:
from sklearn.metrics import confusion_matrix, accuracy_score

# CONFUSION MATRIX
cm_train = confusion_matrix(y_train, y_pred_train)
print(cm_train)

cm_test = confusion_matrix(y_test, y_pred_test)
print(cm_test)

[[2461   20]
 [ 864  155]]
[[1043    6]
 [ 377   74]]


In [18]:
# ACCURACY
accuracy_train = accuracy_score(y_train, y_pred_train)
print((accuracy_train)*100, '%')

74.74285714285715 %


In [19]:
accuracy_test = accuracy_score(y_test, y_pred_test)
print((accuracy_test)*100, '%')

74.46666666666667 %


In [20]:
from sklearn.metrics import classification_report

# ADDTIONAL PERFORMANCE METRICS.
report_train = classification_report(y_train, y_pred_train)
report_test = classification_report(y_test, y_pred_test)

print("Training Set Metrics:")
print(report_train)

print("Testing Set Metrics:")
print(report_test)

# Feature weights (coefficients) for linear kernel
if classifier.kernel == 'linear':
    feature_weights = classifier.coef_
    print("Feature Weights:")
    print(feature_weights)


Training Set Metrics:
              precision    recall  f1-score   support

           0       0.74      0.99      0.85      2481
           1       0.89      0.15      0.26      1019

    accuracy                           0.75      3500
   macro avg       0.81      0.57      0.55      3500
weighted avg       0.78      0.75      0.68      3500

Testing Set Metrics:
              precision    recall  f1-score   support

           0       0.73      0.99      0.84      1049
           1       0.93      0.16      0.28       451

    accuracy                           0.74      1500
   macro avg       0.83      0.58      0.56      1500
weighted avg       0.79      0.74      0.67      1500



In [21]:
import numpy as np

# Collect user input for the 12 features
age = float(input("Enter Age: "))
experience = float(input("Enter Experience: "))
income = float(input("Enter Income: "))
zip_code = float(input("Enter ZIP Code: "))
family = float(input("Enter Family: "))
ccavg = float(input("Enter CCAvg: "))
education = float(input("Enter Education (1, 2, 3): "))
mortgage = float(input("Enter Mortgage: "))
personal_loan = float(input("Personal Loan (0 for No, 1 for Yes): "))
securities_account = float(input("Securities Account (0 for No, 1 for Yes): "))
cd_account = float(input("CD Account (0 for No, 1 for Yes): "))
online = float(input("Online (0 for No, 1 for Yes): "))

# Organize user input into a numpy array
user_input = np.array([age, experience, income, zip_code, family, ccavg, education, mortgage, personal_loan, securities_account, cd_account, online]).reshape(1, -1)

# Scale the user input using the same StandardScaler used for the training data
user_input_scaled = sc.transform(user_input)

# Make a prediction using the trained SVM model
prediction = classifier.predict(user_input_scaled)

# Print the prediction
if prediction == 1:
    print("The user is eligible for a Credit Card.")
else:
    print("The user is not eligible for a Credit Card.")


Enter Age: 14


KeyboardInterrupt: ignored

In [23]:
import joblib

# Save the trained SVM model to a file
joblib.dump(classifier, 'svm_modelN.pkl')

# Save the StandardScaler object to a file
joblib.dump(sc, 'scalerN.pkl')


['scalerN.pkl']