# Predicting A Pulsar Star 
# Using Support Vector Machines Classifier



In [64]:
# importing pandas libray for CSV file I/O
import pandas as pd
# Importing the dataset
df = pd.read_csv("pulsar_stars.csv")

In [65]:
print('First row of the dataset: ')
print(df.head(1))

First row of the dataset: 
    Mean of the integrated profile  \
0                         140.5625   

    Standard deviation of the integrated profile  \
0                                      55.683782   

    Excess kurtosis of the integrated profile  \
0                                   -0.234571   

    Skewness of the integrated profile   Mean of the DM-SNR curve  \
0                            -0.699648                   3.199833   

    Standard deviation of the DM-SNR curve  \
0                                19.110426   

    Excess kurtosis of the DM-SNR curve  Skewness of the DM-SNR curve  \
0                              7.975532                     74.242225   

   out_target_class  
0                 0  


In [66]:
print('Shape of the Data frame: ', df.shape)

Shape of the Data frame:  (17898, 9)


In [67]:
print('Initial names of columns: ', df.columns)

Initial names of columns:  Index([' Mean of the integrated profile',
       ' Standard deviation of the integrated profile',
       ' Excess kurtosis of the integrated profile',
       ' Skewness of the integrated profile', ' Mean of the DM-SNR curve',
       ' Standard deviation of the DM-SNR curve',
       ' Excess kurtosis of the DM-SNR curve', 'Skewness of the DM-SNR curve',
       'out_target_class'],
      dtype='object')


In [69]:
# removes spaces from the both ends of the columns names
df.columns = df.columns.str.strip()

# setting new names for the columns
df.columns = ['IP Mean','IP Sd','IP Kurtosis','IP Skewness', 
'DM-SNR Mean','DM-SNR Sd','DM-SNR Kurtosis','DM-SNR Skewness','target_class']

In [70]:
print('New names of columns: ', df.columns)

New names of columns:  Index(['IP Mean', 'IP Sd', 'IP Kurtosis', 'IP Skewness', 'DM-SNR Mean',
       'DM-SNR Sd', 'DM-SNR Kurtosis', 'DM-SNR Skewness', 'target_class'],
      dtype='object')


In [71]:
print("The summary of dataset: ") 
print(df.info())

The summary of dataset: 
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17898 entries, 0 to 17897
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   IP Mean          17898 non-null  float64
 1   IP Sd            17898 non-null  float64
 2   IP Kurtosis      17898 non-null  float64
 3   IP Skewness      17898 non-null  float64
 4   DM-SNR Mean      17898 non-null  float64
 5   DM-SNR Sd        17898 non-null  float64
 6   DM-SNR Kurtosis  17898 non-null  float64
 7   DM-SNR Skewness  17898 non-null  float64
 8   target_class     17898 non-null  int64  
dtypes: float64(8), int64(1)
memory usage: 1.2 MB
None


In [72]:
print("Check for missing values: ")
print(df.isnull().sum())

Check for missing values: 
IP Mean            0
IP Sd              0
IP Kurtosis        0
IP Skewness        0
DM-SNR Mean        0
DM-SNR Sd          0
DM-SNR Kurtosis    0
DM-SNR Skewness    0
target_class       0
dtype: int64


In [85]:
# There are no missing values in variables

In [74]:
# Declaring the feature vector and target variable
X = df.drop(['target_class'], axis = 1)
y = df['target_class']

In [75]:
# Splitting X and y into training and testing sets
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

In [76]:
print('Shape of Training set of features: ', X_train.shape)
print('Shape of Testing set of features: ', X_test.shape)
print('Shape of Training set of target: ', y_train.shape)
print('Shape of Testing set of target: ', y_test.shape)

Shape of Training set of features:  (14318, 8)
Shape of Testing set of features:  (3580, 8)
Shape of Training set of target:  (14318,)
Shape of Testing set of target:  (3580,)


In [77]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# fit() calculates the mean and standard deviation of X
scaler.fit(X)

StandardScaler()

In [78]:
# transform() fits the data
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [79]:
# Import SVC Classifier
from sklearn.svm import SVC
# Instantiate classifier with default hyperparameters
svc = SVC()
# Fitting classifier to training set
svc.fit(X_train, y_train)

SVC()

In [80]:
# make predictions on the test set
y_pred = svc.predict(X_test)

In [82]:
# importing metrics to calculate accuracy score
from sklearn import metrics
print("The accuracy score is: ", metrics.accuracy_score(y_test, y_pred))

The accuracy score is:  0.9826815642458101


In [83]:
# Classification Report
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      3306
           1       0.92      0.84      0.88       274

    accuracy                           0.98      3580
   macro avg       0.96      0.92      0.94      3580
weighted avg       0.98      0.98      0.98      3580



In [84]:
# Confusion Matrix
print(metrics.confusion_matrix(y_test, y_pred))

[[3287   19]
 [  43  231]]
