In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from mpl_toolkits.mplot3d import Axes3D
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import cross_val_score
from mpl_toolkits.mplot3d import Axes3D

In [2]:
#SQL Query

# SELECT TOP 200
# g.oii_3726_flux, g.neiii_3869_flux, g.oiii_5007_flux, g.h_beta_flux, 
# g.nii_6584_flux, g.h_alpha_flux, g.sii_6731_flux, s.z, Gal.bptclass

# FROM GalSpecLine as g
# JOIN GalSpecExtra AS Gal ON g.specObjID = Gal.specObjID
# JOIN SpecPhotoAll AS s ON g.specObjID = s.specObjID
# WHERE 
# s.class = 'Galaxy' AND s.z < 0.3 And Gal.bptclass != -1 AND Gal.bptclass != 3


In [3]:
#Reading the data 
Data = pd.read_csv('../Data/Galaxy_Data_2.csv')
Data.head()
Data.shape

(200, 9)

<h1> Data Preprocessing </h1>

In [4]:

Data = Data[(Data['oii_3726_flux'] != 0) & (Data['neiii_3869_flux'] != 0) & (Data['oiii_5007_flux'] != 0) & (Data['h_beta_flux'] != 0) & (Data['nii_6584_flux'] != 0) & (Data['h_alpha_flux'] != 0) & (Data['sii_6731_flux'] != 0)]
Galaxy_Classifications = Data['bptclass']

#For binary classification remove with bpt class of 3

Redshifts = Data['Redshift']
Flux_Measurements = Data.drop(columns = ['Redshift', 'bptclass'])


In [5]:
def change_values(value):
    if value == 1 or value == 2:
        return 0
    else:
        return 1

In [6]:
AGN_StarForming_Classifications = Galaxy_Classifications.apply(change_values)
AGN_StarForming_Classifications

0      0
1      0
2      0
3      0
4      0
      ..
195    0
196    0
197    1
198    1
199    0
Name: bptclass, Length: 185, dtype: int64

In [7]:
Flux_Measurements

Unnamed: 0,oii_3726_flux,neiii_3869_flux,oiii_5007_flux,h_beta_flux,nii_6584_flux,h_alpha_flux,sii_6731_flux
0,29.878280,-1.847041,11.855670,47.366500,54.137490,181.52620,19.167620
1,48.753910,-2.035048,35.466710,38.332260,25.789630,127.30580,25.722970
2,-0.361507,-0.440258,5.567666,18.897210,4.083941,40.48693,-0.516020
3,32.117090,7.667253,15.599010,53.182280,76.354820,211.50180,28.479640
4,17.377120,2.074602,16.261190,42.869560,61.221300,195.51970,24.305620
...,...,...,...,...,...,...,...
195,41.963440,5.267823,24.305210,49.365840,80.865840,222.72120,31.733420
196,24.076190,5.498502,21.370160,41.578920,73.544160,193.78190,27.413800
197,21.902850,-0.049955,15.486540,2.674402,20.173330,22.91956,3.828683
198,7.386456,1.707664,19.735070,2.167771,27.136760,20.49056,5.187601


In [8]:
#Forming Datatable with Ratios [NeIII/OII], [OIII/Hb], [NII/Ha], [SII/Ha]
Aggregate_Data = {'neiii_3869_flux/oii_3726_flux':Flux_Measurements['neiii_3869_flux']/Flux_Measurements['oii_3726_flux'], 
            'oiii_5007_flux/h_beta_flux' : Flux_Measurements['oiii_5007_flux']/Flux_Measurements['h_beta_flux'],  
            'nii_6584_flux/h_alpha_flux': Flux_Measurements['nii_6584_flux']/Flux_Measurements['h_alpha_flux'],
            'sii_6731_flux/h_alpha_flux':Flux_Measurements['sii_6731_flux']/Flux_Measurements['h_alpha_flux']}
Emission_Ratios = pd.DataFrame(data = Aggregate_Data)
Emission_Ratios.head()
#Forming BPT Ratios 
BPT_Data = {'oiii_5007_flux/h_beta_flux' : Flux_Measurements['oiii_5007_flux']/Flux_Measurements['h_beta_flux'],
           'nii_6584_flux/h_alpha_flux': Flux_Measurements['nii_6584_flux']/Flux_Measurements['h_alpha_flux'],
           'sii_6731_flux/h_alpha_flux':Flux_Measurements['sii_6731_flux']/Flux_Measurements['h_alpha_flux']}
BPT_Ratios = pd.DataFrame(data = BPT_Data)
BPT_Ratios.head()

Unnamed: 0,oiii_5007_flux/h_beta_flux,nii_6584_flux/h_alpha_flux,sii_6731_flux/h_alpha_flux
0,0.250297,0.298235,0.105591
1,0.925244,0.20258,0.202057
2,0.294629,0.100871,-0.012745
3,0.293312,0.361013,0.134654
4,0.379318,0.313121,0.124313


<h2> BPT Ratios Model <h2> 

In [9]:
X_train, X_test, Y_Train, Y_test = train_test_split(BPT_Ratios, AGN_StarForming_Classifications, test_size = .33)

In [10]:
model = SVC(kernel = 'rbf')
model.fit(X_train, Y_Train)

SVC()

In [11]:
predictions = model.predict(X_test)

In [12]:
accuracy = accuracy_score(Y_test, predictions)
print("Accuracy:", accuracy)

Accuracy: 0.9354838709677419


In [13]:
class_report = classification_report(Y_test, predictions)
print("Classification Report:\n", class_report)

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.94      0.96        49
           1       0.80      0.92      0.86        13

    accuracy                           0.94        62
   macro avg       0.89      0.93      0.91        62
weighted avg       0.94      0.94      0.94        62



<h2> Aggregate Model <h2> 

In [14]:
X_train, X_test, Y_Train, Y_test = train_test_split(Emission_Ratios, AGN_StarForming_Classifications, test_size = .33)

In [15]:
model = SVC(kernel = 'rbf')
model.fit(X_train, Y_Train)

SVC()

In [16]:
predictions = model.predict(X_test)

In [17]:
accuracy = accuracy_score(Y_test, predictions)
print("Accuracy:", accuracy)

Accuracy: 0.8064516129032258


In [18]:
class_report = classification_report(Y_test, predictions)
print("Classification Report:\n", class_report)

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.84      0.86        45
           1       0.63      0.71      0.67        17

    accuracy                           0.81        62
   macro avg       0.76      0.78      0.77        62
weighted avg       0.81      0.81      0.81        62



<h2> 10-Fold Cross Validation <h2>

In [19]:
svm_model = SVC(kernel = 'rbf')
num_folds = 10
scores = cross_val_score(svm_model, Emission_Ratios, AGN_StarForming_Classifications, cv = num_folds)
print(scores)

[0.84210526 0.94736842 0.94736842 0.94736842 0.84210526 0.77777778
 0.83333333 0.94444444 0.77777778 0.83333333]


In [20]:
accuracy = np.mean(scores)
print(accuracy)

0.8692982456140351


<h1> Multi-Class SVM <h1>