# Support Vector Machine and Naive Bayes Algorithm

### Loading the required libraries

In [3]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
%matplotlib inline

### Loading the Dataset named DrugData

In [4]:
D = pd.read_csv('DrugData.csv')
D.head()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,1,2,1,25.355,drugY
1,47,0,1,1,13.093,drugC
2,47,0,1,1,10.114,drugC
3,28,1,0,1,7.798,drugX
4,61,1,1,1,18.043,drugY


### Assigning x and y variables

In [12]:
x = D.drop('Drug', axis=1).to_numpy()
y = D['Drug'].to_numpy()

### Creating Training and Test Datasets

In [13]:
from sklearn.model_selection import train_test_split
x_train, x_test,y_train, y_test = train_test_split(x, y, stratify=y,test_size=0.2,random_state=100)

### Scaling the Dataset

In [14]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train2 = sc.fit_transform(x_train)
x_test2 = sc.transform(x_test)

### To identify the unique classes in the field Drug

In [8]:
D.Drug.unique()

array(['drugY', 'drugC', 'drugX', 'drugA', 'drugB'], dtype=object)

# Conducting Analysis Using SVM model

In [10]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix  

for name,method in [('SVM', SVC(kernel='linear',random_state=100))]: 
    method.fit(x_train2,y_train)
    predict = method.predict(x_test2)
    target_names=['drugA','drugB','drugC','drugX','drugY']
    print('\nEstimator: {}'.format(name)) 
    print(confusion_matrix(y_test,predict))  
    print(classification_report(y_test,predict,target_names=target_names)) 


Estimator: SVM
[[ 5  0  0  0  0]
 [ 0  2  0  0  1]
 [ 0  0  3  0  0]
 [ 0  0  0 11  0]
 [ 0  0  0  1 17]]
              precision    recall  f1-score   support

       drugA       1.00      1.00      1.00         5
       drugB       1.00      0.67      0.80         3
       drugC       1.00      1.00      1.00         3
       drugX       0.92      1.00      0.96        11
       drugY       0.94      0.94      0.94        18

    accuracy                           0.95        40
   macro avg       0.97      0.92      0.94        40
weighted avg       0.95      0.95      0.95        40



# Conducting Analysis Using Naive Bayes model

In [11]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix  

for name,method in [('Naive Bayes',GaussianNB())]: 
    method.fit(x_train2,y_train)
    predict = method.predict(x_test2)
    target_names=['drugA','drugB','drugC','drugX','drugY']
    print('\nEstimator: {}'.format(name)) 
    print(confusion_matrix(y_test,predict))  
    print(classification_report(y_test,predict,target_names=target_names)) 


Estimator: Naive Bayes
[[ 5  0  0  0  0]
 [ 0  3  0  0  0]
 [ 0  0  3  0  0]
 [ 0  0  0 10  1]
 [ 1  1  3  1 12]]
              precision    recall  f1-score   support

       drugA       0.83      1.00      0.91         5
       drugB       0.75      1.00      0.86         3
       drugC       0.50      1.00      0.67         3
       drugX       0.91      0.91      0.91        11
       drugY       0.92      0.67      0.77        18

    accuracy                           0.82        40
   macro avg       0.78      0.92      0.82        40
weighted avg       0.86      0.82      0.83        40

