# Non-linear Support Vector Machine  Example

## Importing and exploring our Data

In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd

star_data = pd.read_csv("D:\ProjectData\pulsar_data.csv")

star_data.head()

Unnamed: 0,Mean of the integrated profile,Standard deviation of the integrated profile,Excess kurtosis of the integrated profile,Skewness of the integrated profile,Mean of the DM-SNR curve,Standard deviation of the DM-SNR curve,Excess kurtosis of the DM-SNR curve,Skewness of the DM-SNR curve,target_class
0,121.15625,48.372971,0.375485,-0.013165,3.168896,18.399367,7.449874,65.159298,0.0
1,76.96875,36.175557,0.712898,3.388719,2.399666,17.570997,9.414652,102.722975,0.0
2,130.585938,53.229534,0.133408,-0.297242,2.743311,22.362553,8.508364,74.031324,0.0
3,156.398438,48.865942,-0.215989,-0.171294,17.471572,,2.958066,7.197842,0.0
4,84.804688,36.117659,0.825013,3.274125,2.790134,20.618009,8.405008,76.291128,0.0


In [2]:
star_data.shape

(12528, 9)

## Data pre-processing

In [3]:
star_data.isnull().sum()

 Mean of the integrated profile                     0
 Standard deviation of the integrated profile       0
 Excess kurtosis of the integrated profile       1735
 Skewness of the integrated profile                 0
 Mean of the DM-SNR curve                           0
 Standard deviation of the DM-SNR curve          1178
 Excess kurtosis of the DM-SNR curve                0
 Skewness of the DM-SNR curve                     625
target_class                                        0
dtype: int64

In [5]:
star_data.dropna(inplace = True)

In [6]:
star_data.isnull().sum()

 Mean of the integrated profile                  0
 Standard deviation of the integrated profile    0
 Excess kurtosis of the integrated profile       0
 Skewness of the integrated profile              0
 Mean of the DM-SNR curve                        0
 Standard deviation of the DM-SNR curve          0
 Excess kurtosis of the DM-SNR curve             0
 Skewness of the DM-SNR curve                    0
target_class                                     0
dtype: int64

In [7]:
star_data.dtypes

 Mean of the integrated profile                  float64
 Standard deviation of the integrated profile    float64
 Excess kurtosis of the integrated profile       float64
 Skewness of the integrated profile              float64
 Mean of the DM-SNR curve                        float64
 Standard deviation of the DM-SNR curve          float64
 Excess kurtosis of the DM-SNR curve             float64
 Skewness of the DM-SNR curve                    float64
target_class                                     float64
dtype: object

In [8]:
star_data.columns

Index([' Mean of the integrated profile',
       ' Standard deviation of the integrated profile',
       ' Excess kurtosis of the integrated profile',
       ' Skewness of the integrated profile', ' Mean of the DM-SNR curve',
       ' Standard deviation of the DM-SNR curve',
       ' Excess kurtosis of the DM-SNR curve', ' Skewness of the DM-SNR curve',
       'target_class'],
      dtype='object')

In [10]:
star_data.columns = star_data.columns.str.strip()

star_data.columns

Index(['Mean of the integrated profile',
       'Standard deviation of the integrated profile',
       'Excess kurtosis of the integrated profile',
       'Skewness of the integrated profile', 'Mean of the DM-SNR curve',
       'Standard deviation of the DM-SNR curve',
       'Excess kurtosis of the DM-SNR curve', 'Skewness of the DM-SNR curve',
       'target_class'],
      dtype='object')

In [11]:
X = star_data.drop('target_class', 1)
y = star_data['target_class']

In [15]:
from sklearn.preprocessing import StandardScaler

s_scaler = StandardScaler()

X_ss = pd.DataFrame(s_scaler.fit_transform(X), 
                            columns = X.columns)

In [16]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_ss, y, test_size=0.25, random_state=42)

### Implementing the SVM

In [17]:
from sklearn import svm 

clf_linear = svm.SVC(kernel = 'linear', C = 10)

clf_linear.fit(X_train, y_train)

SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [18]:
from sklearn.metrics import f1_score

y_pred = clf_linear.predict(X_test)

f1_score(y_test, y_pred)

0.8720626631853786

In [19]:
clf_rbf = svm.SVC(kernel = 'rbf', C = 10)

clf_rbf.fit(X_train, y_train)


SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [20]:
y_pred = clf_rbf.predict(X_test)

f1_score(y_test, y_pred)

0.883720930232558

In [21]:
costs = [0.1, 1, 10, 100, 1000]

for cost in costs:
    clf_rbf = svm.SVC(kernel = 'rbf', C = cost)
    clf_rbf.fit(X_train, y_train)
    y_pred = clf_rbf.predict(X_test)
    score = f1_score(y_test, y_pred)
    print(f"Cost: {cost} F1 score: {score}")


Cost: 0.1 F1 score: 0.8723404255319148
Cost: 1 F1 score: 0.8785529715762272
Cost: 10 F1 score: 0.883720930232558
Cost: 100 F1 score: 0.8616187989556137
Cost: 1000 F1 score: 0.859375
