# Import all the needed packages

In [None]:
from natsort import natsorted
from collections import defaultdict
import numpy as np
import os 
import pandas as pd
import joblib
from PRF import prf

# Load the dataset

In [12]:
path = '/home/elias/Projects/Github_rep/SpectralClassifier/' # Change the path with yours
file_with_data = 'test_sample'
infile_data = path + file_with_data

In [14]:
unseen_data = defaultdict(dict) #master dictionary
unseen_data_err= defaultdict(dict)
with open(infile_data, 'r') as inf :
    for line in inf.readlines():
        if line[0] != '#' :
            col = line.split(',') #','   
            target_name = col[0]
            spec_lines  = col[1]
            ew_values   = col[2]
            ew_errors   = col[3]
            unseen_data[target_name][spec_lines] = ew_values
            unseen_data_err[target_name][spec_lines] = ew_errors

all_spec_lines = list(list(unseen_data.values())[0])
print('Spectral Lines: {}'.format(all_spec_lines))

Spectral Lines: ['HeI/4009', 'HeI/4026', 'SiIV/4088', 'Hd/4100', 'SiIV/4116', 'HeI/4121', 'SiII/4130', 'HeI/4144', 'HeII/4200', 'FeII/4233', 'HeI/4387', 'OII/4416', 'HeI/4471', 'MgII/4481', 'HeII/4541', 'SiIII/4553', 'OII+CIII/4645', 'HeII/4686']


# Select the characteristic spectral lines 

In [15]:
selected_lines =['HeI/4009', 'HeI/4026', 'SiIV/4088', 'SiIV/4116', 'HeI/4121', 'SiII/4130', 
                 'HeI/4144', 'HeII/4200', 'FeII/4233', 'HeI/4387', 'OII/4416', 'HeI/4471', 
                 'MgII/4481', 'HeII/4541', 'SiIII/4553', 'OII+CIII/4645', 'HeII/4686']
print('Spectral Features are:')
print(selected_lines)

Spectral Features are:
['HeI/4009', 'HeI/4026', 'SiIV/4088', 'SiIV/4116', 'HeI/4121', 'SiII/4130', 'HeI/4144', 'HeII/4200', 'FeII/4233', 'HeI/4387', 'OII/4416', 'HeI/4471', 'MgII/4481', 'HeII/4541', 'SiIII/4553', 'OII+CIII/4645', 'HeII/4686']


# Check the number of the spectra which will be classified by the model

In [16]:
print('Unseen data: {}'.format(len(unseen_data)))
print('-'*25)

Unseen data: 8
-------------------------


# Creating the arrays with the targets ID and the EW, EW_error from each line

In [17]:
Targets_unseen = []
EW_values_unseen = []
EW_errors_unseen = []

for i in unseen_data.keys():
    temp_1,temp_2 = [],[]
    for j in selected_lines:
        temp_1.append(unseen_data[i][j])
        temp_2.append(unseen_data_err[i][j])
    Targets_unseen.append((i))
    EW_values_unseen.append(temp_1)
    EW_errors_unseen.append(temp_2)
    

EW_values_unseen= np.asarray(EW_values_unseen)
EW_errors_unseen = np.asarray(EW_errors_unseen)
Targets_unseen = np.asarray(Targets_unseen)

# Load the pre-trained model and apply it 

In [18]:
loaded_prf = joblib.load("PRF_best_model_17_lines_FINAL.joblib")
unseen_predicted_IACOB = loaded_prf.predict(X=EW_values_unseen,dX=EW_errors_unseen)
unseen_predicted_probs = loaded_prf.predict_proba(EW_values_unseen)
print(len(unseen_predicted))

# Produce a csv file with the final results and the confidence level of each prediction

In [17]:
filename='Classification_Results_PRF'
indexx=np.arange(0,11,1)
with open(filename,'w') as flname:
    flname.write('ID'+','+'PRF_ST'+','+'Probability'+','+'Confidence Level'+','+'B0'+','+'B1'+','+'B2'+','+'B3-B4'+','+'B5-B7'+','+'B8'+','+'B9'+','+'O2-O6'+','+'O7'+','+'O8'+','+'O9'+'\n')
#     print(indexx)
    for ID,IA_ST,PRF_ST,Probs in zip(Targets_unseen,unseen_predicted,unseen_predicted_probs):
        if max(Probs) > 0.64:
            flname.write(str(ID)+','+str(PRF_ST)+','+str(np.round(max(Probs),4))+','+"Strong Candidate"+',')
            for i in indexx:
#                 print(Probs)
                if i in indexx[:-1]:
                    flname.write(str(np.round(Probs[i],4))+',')
                else:
                    flname.write(str(np.round(Probs[i],4)) )
            flname.write("\n")
        if max(Probs) < 0.49:
            flname.write(str(ID)+','+str(PRF_ST)+','+str(np.round(max(Probs),4))+','+"Candidate"+',')
            for i in indexx:
                if i in indexx[:-1]:
                    flname.write(str(np.round(Probs[i],4))+',')
                else:
                    flname.write(str(np.round(Probs[i],4)) )
            flname.write("\n")
            
        if max(Probs)>0.49 and max(Probs)<0.64:
            flname.write(str(ID)+','+str(PRF_ST)+','+str(np.round(max(Probs),4))+','+"Good Candidate"+',')
            for i in indexx:
                if i in indexx[:-1]:
                    flname.write(str(np.round(Probs[i],4))+',')
                else:
                    flname.write(str(np.round(Probs[i],4)))
            flname.write("\n") 

# See the results as a DataFrame

In [19]:
df = pd.read_csv("Classification_Results_PRF",index_col=False)
df.head(30)