In [29]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from mpl_toolkits.mplot3d import Axes3D
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import cross_val_score
from mpl_toolkits.mplot3d import Axes3D
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC

In [30]:
# SQL Query

# SELECT TOP 200
# g.oii_3726_flux, g.neiii_3869_flux, g.oiii_5007_flux, g.h_beta_flux, 
# g.nii_6584_flux, g.h_alpha_flux, g.sii_6731_flux, s.z, Gal.bptclass

# FROM GalSpecLine as g
# JOIN GalSpecExtra AS Gal ON g.specObjID = Gal.specObjID
# JOIN SpecPhotoAll AS s ON g.specObjID = s.specObjID
# WHERE 
# s.class = 'Galaxy' AND s.z < 0.3 And Gal.bptclass != -1


In [31]:
#Reading the data 
Data = pd.read_csv('../Data/Galaxy_Data.csv')
Data.head()
Data.shape

(200, 9)

<h1> Data Preprocessing </h1>

In [32]:

Data = Data[(Data['oii_3726_flux'] != 0) & (Data['neiii_3869_flux'] != 0) & (Data['oiii_5007_flux'] != 0) & (Data['h_beta_flux'] != 0) & (Data['nii_6584_flux'] != 0) & (Data['h_alpha_flux'] != 0) & (Data['sii_6731_flux'] != 0)]
#For binary classification remove with bpt class of 3
Binary_Classification = Data[Data['bptclass'] != 3]
Galaxy_Classifications = Binary_Classification['bptclass']
Binary_Classification.head()
Redshifts = Binary_Classification['Redshift']
Flux_Measurements = Binary_Classification.drop(columns = ['Redshift', 'bptclass'])


In [33]:
def change_values(value):
    if value == 1 or value == 2:
        return 1
    if value == 3:
        return 2
    elif value == 4 or value == 5:
        return 3

In [34]:
AGN_StarForming_Classifications = Galaxy_Classifications.apply(change_values)
AGN_StarForming_Classifications

3      1
4      1
5      1
6      1
7      1
      ..
194    1
195    3
196    1
197    1
199    1
Name: bptclass, Length: 163, dtype: int64

In [35]:
Flux_Measurements

Unnamed: 0,oii_3726_flux,neiii_3869_flux,oiii_5007_flux,h_beta_flux,nii_6584_flux,h_alpha_flux,sii_6731_flux
3,29.878280,-1.847041,11.855670,47.366500,54.137490,181.52620,19.167620
4,48.753910,-2.035048,35.466710,38.332260,25.789630,127.30580,25.722970
5,-0.361507,-0.440258,5.567666,18.897210,4.083941,40.48693,-0.516020
6,32.117090,7.667253,15.599010,53.182280,76.354820,211.50180,28.479640
7,17.377120,2.074602,16.261190,42.869560,61.221300,195.51970,24.305620
...,...,...,...,...,...,...,...
194,-1.016173,0.166754,4.154623,7.404680,8.230973,18.20489,0.028184
195,11.741700,1.130993,12.104400,5.016534,18.810300,23.94296,5.658797
196,198.676500,10.777490,107.406300,194.576900,229.458900,724.72580,90.241070
197,13.262350,4.131592,13.496670,53.704480,91.392560,271.50200,27.886870


In [36]:
#Forming Datatable with Ratios [NeIII/OII], [OIII/Hb], [NII/Ha], [SII/Ha]
Aggregate_Data = {'neiii_3869_flux/oii_3726_flux':Flux_Measurements['neiii_3869_flux']/Flux_Measurements['oii_3726_flux'], 
            'oiii_5007_flux/h_beta_flux' : Flux_Measurements['oiii_5007_flux']/Flux_Measurements['h_beta_flux'],  
            'nii_6584_flux/h_alpha_flux': Flux_Measurements['nii_6584_flux']/Flux_Measurements['h_alpha_flux'],
            'sii_6731_flux/h_alpha_flux':Flux_Measurements['sii_6731_flux']/Flux_Measurements['h_alpha_flux']}
Emission_Ratios = pd.DataFrame(data = Aggregate_Data)
Emission_Ratios.head()
#Forming BPT Ratios 
BPT_Data = {'oiii_5007_flux/h_beta_flux' : Flux_Measurements['oiii_5007_flux']/Flux_Measurements['h_beta_flux'],
           'nii_6584_flux/h_alpha_flux': Flux_Measurements['nii_6584_flux']/Flux_Measurements['h_alpha_flux'],
           'sii_6731_flux/h_alpha_flux':Flux_Measurements['sii_6731_flux']/Flux_Measurements['h_alpha_flux']}
BPT_Ratios = pd.DataFrame(data = BPT_Data)
BPT_Ratios.head()

Unnamed: 0,oiii_5007_flux/h_beta_flux,nii_6584_flux/h_alpha_flux,sii_6731_flux/h_alpha_flux
3,0.250297,0.298235,0.105591
4,0.925244,0.20258,0.202057
5,0.294629,0.100871,-0.012745
6,0.293312,0.361013,0.134654
7,0.379318,0.313121,0.124313


<h2> BPT Ratios Model <h2> 

In [37]:
X_train, X_test, Y_Train, Y_test = train_test_split(BPT_Ratios, AGN_StarForming_Classifications, test_size = .33)

In [38]:
model = SVC(kernel = 'rbf')
model.fit(X_train, Y_Train)

SVC()

In [39]:
predictions = model.predict(X_test)

In [40]:
accuracy = accuracy_score(Y_test, predictions)
print("Accuracy:", accuracy)

Accuracy: 0.9074074074074074


In [41]:
class_report = classification_report(Y_test, predictions)
print("Classification Report:\n", class_report)

Classification Report:
               precision    recall  f1-score   support

           1       0.90      0.97      0.94        39
           3       0.92      0.73      0.81        15

    accuracy                           0.91        54
   macro avg       0.91      0.85      0.88        54
weighted avg       0.91      0.91      0.90        54



<h2> Aggregate Model <h2> 

In [42]:
X_train, X_test, Y_Train, Y_test = train_test_split(Emission_Ratios, AGN_StarForming_Classifications, test_size = .33)

In [43]:
model = SVC(kernel = 'rbf')
model.fit(X_train, Y_Train)

SVC()

In [44]:
predictions = model.predict(X_test)

In [45]:
accuracy = accuracy_score(Y_test, predictions)
print("Accuracy:", accuracy)

Accuracy: 0.8518518518518519


In [46]:
class_report = classification_report(Y_test, predictions)
print("Classification Report:\n", class_report)

Classification Report:
               precision    recall  f1-score   support

           1       0.86      0.95      0.90        40
           3       0.80      0.57      0.67        14

    accuracy                           0.85        54
   macro avg       0.83      0.76      0.79        54
weighted avg       0.85      0.85      0.84        54



<h2> 10-Fold Cross Validation <h2>

In [47]:
svm_model = SVC(kernel = 'rbf')
num_folds = 10
scores = cross_val_score(svm_model, Emission_Ratios, AGN_StarForming_Classifications, cv = num_folds)
print(scores)

[0.82352941 0.82352941 0.94117647 0.9375     1.         0.8125
 0.8125     0.6875     0.875      0.875     ]


In [48]:
accuracy = np.mean(scores)
print(accuracy)

0.8588235294117647


<h1> Multi-Class SVM <h1>

In [49]:
Multiclass_Data = pd.read_csv('../Data/Galaxy_Data_2.csv')
Multiclass_Data = Multiclass_Data[(Multiclass_Data['oii_3726_flux'] != 0) & (Multiclass_Data['neiii_3869_flux'] != 0) & (Multiclass_Data['oiii_5007_flux'] != 0) & (Multiclass_Data['h_beta_flux'] != 0) & (Multiclass_Data['nii_6584_flux'] != 0) & (Multiclass_Data['h_alpha_flux'] != 0) & (Multiclass_Data['sii_6731_flux'] != 0)]
Multi_Classifications = Multiclass_Data['bptclass']
Redshifts = Data['Redshift']
Flux_Measurements = Multiclass_Data.drop(columns = ['Redshift', 'bptclass'])

In [50]:
Multi_Classifications = Multi_Classifications.apply(change_values)
print(Multi_Classifications.values)

[2 2 1 1 1 1 1 3 1 1 1 2 1 1 1 1 3 3 1 3 3 3 1 1 2 3 3 2 1 1 1 3 3 1 1 1 1
 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 2 2 2 1 1 1 1 1 1 2 1 1 2 3 1 1 2 1 1 3 1 3
 1 1 3 1 2 2 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 1 3 1 1 3 1 1 3 2 1 1 3 1 1
 1 3 1 1 1 2 1 1 1 1 1 3 1 3 1 3 1 1 1 2 3 1 1 1 3 1 1 1 1 2 1 1 2 1 1 3 1
 1 3 1 1 3 3 1 3 1 1 3 1 1 3 1 1 3 1 1 3 1 1 1 3 3 1 3 1 3 3 1 3 1 1 2 1 3
 1 1 1 1 1 3 1 1 1 2 3 1 3 1 3 3 1 1 1 3 3 1 2 3 3 2 2 3 1 3 1 1 2 1 1 3 2
 1 1 1 1 1 1 1 1 2 1 1 1 2 1 2 1 1 3 1 3 3 1 1 1 1 1 1 1 3 1 1 1 2 1 1 1 1
 1 1 1 1 1 3 1 1 3 1 1 2 1 1 3 1 1 1 1 1 1 1 3 3 1 3 3 1 1 1 1 1 3 1 1 3 3
 1 1 1 1 3 1 1 1 1 1 1 1 1 1 3 1 3 3 3 3 1 1 2 1 2 1 1 1 3 1 3 1 1 3 1 1 3
 1 3 1 3 3 1 1 1 3 3 2 1 1 2 1 1 1 1 1 2 1 1 2 3 3 1 1 3 3 3 3 3 2 1 1 1 1
 1 2 3 1 3 1 3 1 1 1 1 1 3 1 1 2 1 1 1 3 1 1 1 1 1 1 1 1 3 2 1 3 1 1 1 1 1
 1 3 1 1 1 1 1 1 1 3 1 2 1 3 1 1 1 1 1 1 3 1 1 1 3 1 1 1 1 1 1 1 1 1 3 1 1
 3 1 1 1 1 1 3 1 2 1 1 3 1 1 1 1 3 1 1 1 1 3 1 3 1 1 3 1 1 1 1 2 3 1 3 1 3
 1 1 1 1 1 1 1 1 1 1 1 3 

In [51]:
Aggregate_Data = {'neiii_3869_flux/oii_3726_flux':Flux_Measurements['neiii_3869_flux']/Flux_Measurements['oii_3726_flux'], 
            'oiii_5007_flux/h_beta_flux' : Flux_Measurements['oiii_5007_flux']/Flux_Measurements['h_beta_flux'],  
            'nii_6584_flux/h_alpha_flux': Flux_Measurements['nii_6584_flux']/Flux_Measurements['h_alpha_flux'],
            'sii_6731_flux/h_alpha_flux':Flux_Measurements['sii_6731_flux']/Flux_Measurements['h_alpha_flux']}
Emission_Ratios = pd.DataFrame(data = Aggregate_Data)
print(Emission_Ratios)

     neiii_3869_flux/oii_3726_flux  oiii_5007_flux/h_beta_flux   
0                        -0.066153                    0.731779  \
2                         0.063194                    0.836349   
3                        -0.061819                    0.250297   
4                        -0.041741                    0.925244   
5                         1.217840                    0.294629   
..                             ...                         ...   
995                      -0.172349                    0.664793   
996                       0.177866                    1.391252   
997                       0.358351                    0.922935   
998                       0.090607                    0.363466   
999                       0.317689                    0.190422   

     nii_6584_flux/h_alpha_flux  sii_6731_flux/h_alpha_flux  
0                      0.445568                    0.223006  
2                      0.519029                    0.217885  
3                    

In [52]:
X_train, X_test, y_train, y_test = train_test_split(Emission_Ratios, Multi_Classifications, test_size = 0.25, random_state = 42)

In [53]:
classifier = OneVsRestClassifier(SVC(kernel='poly', C=1.0))

In [54]:
classifier.fit(X_train, y_train)

OneVsRestClassifier(estimator=SVC(kernel='poly'))

In [55]:
prediction = classifier.predict(X_test)

In [56]:
print(f"Test Set Accuracy : {accuracy_score(y_test, prediction)}\n")
print(f"Classification Report : \n\n{classification_report(y_test, prediction)}")

Test Set Accuracy : 0.7682403433476395

Classification Report : 

              precision    recall  f1-score   support

           1       0.77      0.99      0.86       172
           2       0.00      0.00      0.00        12
           3       0.82      0.18      0.30        49

    accuracy                           0.77       233
   macro avg       0.53      0.39      0.39       233
weighted avg       0.74      0.77      0.70       233



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
