   <a id='top'></a>
<div class="list-group" id="list-tab" role="tablist">
<h1 style='background:#41BEB9;padding-top:12px; border:0; color:black'><center>Content</center></h1> 

[1. Libraries](#1)
    
[2. Data Analysis](#2)     

[3. Data Preprocessing](#3)       
    
[4. Model Implementation](#4)     

[5. Sklearn Implementation](#5) 

[6. Thank You](#6)

    
 <a id="1"></a>
<h1 style='background:#41BEB9;padding-top:12px; border:0; color:black'><center>Libraries</center></h1> 


# Libraries


In [None]:
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px



from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

 <a id="2"></a>
<h1 style='background:#41BEB9;padding-top:17px; border:0; color:black'><center>Data Analysis</center></h1> 

# Data Analysis

In [None]:
df = pd.read_csv('/kaggle/input/breast-cancer-dataset/breast-cancer.csv')
df.head()

In [None]:
px.histogram(data_frame=df, x='diagnosis', color='diagnosis',color_discrete_sequence=['#05445E','#75E6DA'])


In [None]:
px.histogram(data_frame=df,x='area_mean',color='diagnosis',color_discrete_sequence=['#05445E','#75E6DA'])

In [None]:
px.histogram(data_frame=df,x='radius_mean',color='diagnosis',color_discrete_sequence=['#05445E','#75E6DA'])

In [None]:
px.histogram(data_frame=df,x='perimeter_mean',color='diagnosis',color_discrete_sequence=['#05445E','#75E6DA'])

In [None]:
px.histogram(data_frame=df,x='smoothness_mean',color='diagnosis',color_discrete_sequence=['#05445E','#75E6DA'])

In [None]:
px.histogram(data_frame=df,x='texture_mean',color='diagnosis',color_discrete_sequence=['#05445E','#75E6DA'])

In [None]:
px.scatter(data_frame=df,x='symmetry_worst',color='diagnosis',color_discrete_sequence=['#05445E','#75E6DA'])


In [None]:
px.scatter(data_frame=df,x='concavity_worst',color='diagnosis',color_discrete_sequence=['#05445E','#75E6DA'])


In [None]:
px.scatter(data_frame=df,x='fractal_dimension_worst',color='diagnosis',color_discrete_sequence=['#05445E','#75E6DA'])


 <a id="3"></a>
<h1 style='background:#41BEB9;padding-top:12px; border:0; color:black'><center>Data Preprocessing</center></h1> 


# Data Preprocessing

In [None]:
df = pd.read_csv('/kaggle/input/breast-cancer-dataset/breast-cancer.csv')
                 
df.head()

In [None]:
df.drop('id', axis=1, inplace=True) #drop redundant columns

In [None]:
df.describe().T


## Encode target

In [None]:
df['diagnosis'] = (df['diagnosis'] == 'M').astype(int) #encode the label into 1/0

## Get highly correlated features

In [None]:
corr = df.corr()

In [None]:
plt.figure(figsize=(20,20))
sns.heatmap(corr, cmap='mako_r',annot=True)
plt.show()

In [None]:
# Get the absolute value of the correlation
cor_target = abs(corr["diagnosis"])

# Select highly correlated features (thresold = 0.2)
relevant_features = cor_target[cor_target>0.2]

# Collect the names of the features
names = [index for index, value in relevant_features.iteritems()]

# Drop the target variable from the results
names.remove('diagnosis')

# Display the results
print(names)

## Assign data and labels

In [None]:
X = df[names].values
y = df['diagnosis']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42) #split the  data into traing and validating


## Scale the data

In [None]:
scaler = StandardScaler() #create an instance of standard scaler
scaler.fit(X_train) # fit it to the training data

X_train = scaler.transform(X_train) #transform training data
X_test = scaler.transform(X_test) #transform validation data


 <a id="4"></a>
<h1 style='background:#41BEB9;padding-top:12px; border:0; color:black'><center>Model Implementation</center></h1> 

# Model Implementation

# How the algorithm works

**Our goal is to find a hyperplane that separates the data into 2 categories (Binary Classification)**

## Key Points:

### Hyperplane
**A hyperplane is a subspace whose dimensions is less than of it's ambient space for example in n-dimensional subspace the hyperplane will be (n-1)-dimensional**

**For SVMS the goal of this hyperplane has to Maximize margin between the two classes**

**Hyperplane equation is :**
 
**$$wx - b = 0 \ge 1, at y = 1$$**
**$$wx - b = 0 \le 1, at y = -1,$$**

**In general**

**$$y(wx - b) = 0 \ge 1$$**


**So what's a margin?**


### Margin

**Margin is the distance between the hyperplane and the data-points closest to it (support vectors)**


## Gradients Equation
**$$ At y(wx - b) = 0 \ge 1$$**

$$\frac{\partial J}{\partial w}  =[2λw], \frac{\partial J}{\partial b}  =0$$
 


**$$Else$$**

$$\frac{\partial J}{\partial w}  =[2λw-y-x], \frac{\partial J}{\partial b}  =y$$

In [None]:
class SVM:
    def __init__(self, iterations=1000, lr=0.01, lambdaa=0.01):
        self.lambdaa = lambdaa
        self.iterations = iterations
        self.lr = lr
        self.w = None
        self.b = None
        

    def initialize_parameters(self,X):
        #get number of examples and number of features
        m, n = X.shape
        #initialize w to array of zeros of the shape of number of features
        # Note intializing w to random array will sometimes yield better/worse results
        self.w = np.zeros(n)
        #intialize b to zero
        self.b = 0

        
    def gradient_descent(self, X, y):
        # set y to -1 if it's equal or less than 0, else set it to 1
        y_ = np.where(y <= 0, -1, 1)
        # loop over the indexes and elements in X
        for i, x in enumerate(X):
            #Check if the condition mentioned above is true and set dw, db accordingly
            if y_[i] * (np.dot(x, self.w) - self.b) >= 1:
                dw = 2 * self.lambdaa * self.w
                db = 0
            else:
                dw = 2 * self.lambdaa * self.w - np.dot(x, y_[i])
                db = y_[i]
                #update the weights using update weight function
            self.update_parameters(dw,db)

        
    def update_parameters(self, dw, db):
        
        self.w = self.w - self.lr * dw
        self.b = self.b - self.lr * db

        
    def fit(self, X, y):
        #intialize parameters
        self.initialize_parameters(X)
        #loop for specified number of iterations
        for i in range(self.iterations):
            #get the gradients and update weights
            self.gradient_descent(X,y)
            
            
    def predict(self, X):
        # get the outputs
        output = np.dot(X, self.w) - self.b
        # get the signs of the labels depending on if it's greater/less than zero
        label_signs = np.sign(output)
        #set predictions to 0 if they are less than or equal to -1 else set them to 1
        predictions = np.where(label_signs <= -1, 0, 1)
        return predictions


In [None]:
model = SVM()
model.fit(X_train,y_train)
predictions = model.predict(X_test)

accuracy_score(y_test, predictions)

 <a id="5"></a>
<h1 style='background:#41BEB9;padding-top:12px; border:0; color:black'><center>Sklearn Implementation</center></h1> 

# Sklearn Implementation

In [None]:
from sklearn.svm import SVC
skmodel = SVC()
skmodel.fit(X_train, y_train)
sk_predictions = skmodel.predict(X_test)

accuracy_score(y_test, sk_predictions)

 <a id="6"></a>
<h1 style='background:#41BEB9;padding-top:17px; border:0; color:black'><center>Thank You</center></h1> 

# Thank you
**Thank you for going through this notebook**

**If you have any suggestions please let me know**