## This test shows that "build in" model in PAWS works as expected. 
The accuracy for bad_data, form_factor_scattering, and diffraction_peaks should be about 97%, for precursor_scattering about 76%.

In [1]:
import os.path
import glob

import paws.api
import pickle

import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

In [3]:
UPLOAD_FOLDER = 'uploads'
ALLOWED_EXTENSIONS = set(['csv'])

paw = paws.api.start()
paw.add_wf('classifier_test')
paw.activate_op('IO.CSV.CSVToXYData')
paw.activate_op('PROCESSING.SAXS.SpectrumProfiler')
paw.activate_op('IO.MODELS.SAXS.LoadSAXSClassifier')
paw.activate_op('PROCESSING.SAXS.SpectrumClassifier')

paw.add_op('read_csv','IO.CSV.CSVToXYData')
paw.add_op('profile','PROCESSING.SAXS.SpectrumProfiler')
paw.add_op('load_classifier','IO.MODELS.SAXS.LoadSAXSClassifier')
paw.add_op('classify','PROCESSING.SAXS.SpectrumClassifier')

paw.set_input('profile','q_I','read_csv.outputs.x_y')
paw.set_input('classify','features','profile.outputs.features')
paw.set_input('classify','classifier','load_classifier.outputs.saxs_classifier')

This test shows that "build in" model in PAWS works as expected. 


In [6]:
test = pd.read_pickle('training_by_experiments/hold_out_by_exp.pkl')
test.head()

Unnamed: 0,experiment,name,q_Imax,Imax_over_Imean,Imax_sharpness,logI_fluctuation,logI_max_over_std,bad_data,form,precursor,structure,str_label
1416,R7,R7_SAXS_0024_dz_bgsub,0.18,1.731233,1.192316,9.117207,2.391716,False,False,True,False,precursor_
1859,Reaction_G,G_260C_0214171833_0001_dz_bgsub,0.103,19.154102,2.556537,3.58831,3.172426,False,True,True,True,form_precursor_structure
388,R12,R12addOac_SAXS_0011_dz_bgsub,0.04,16.072336,1.027431,5.673599,2.794056,False,True,False,False,form_
1000,R4,R4_cool_0029_dz_bgsub,0.08,77.044648,3.216694,4.004631,4.355049,False,True,False,True,form_structure
865,R3,R3_cool_0044_dz_bgsub,0.052,32.767153,1.669566,2.378482,3.359786,False,True,True,True,form_precursor_structure


In [7]:
test.shape

(200, 12)

In [11]:
names = list(test['name'])
exeriment = list(test['experiment'])
files_to_test = []
for i in range(len(names)):
    f = '2016_saxs_data/' + exeriment[i] + '/' + names[i] + '.csv'
    files_to_test.append(f)

In [23]:
bad_data_labels = []
bad_data_pr = []
form_factor_scattering = []
form_pr = []
precursor_scattering = []
prec_pr = []
diffraction_peaks = []
peaks_pr = []

for fname in files_to_test:
    paw.set_input('read_csv','file_path',fname)
    paw.execute()
    f = paw.get_output('classify','flags')
    #print(f)
    b_d = f['bad_data'][0]
    bad_data_labels.append(f['bad_data'][0])
    bad_data_pr.append(f['bad_data'][1])
    if b_d == False:
        form_factor_scattering.append(f['form_factor_scattering'][0][0])
        form_pr.append(f['form_factor_scattering'][1])
        precursor_scattering.append(f['precursor_scattering'][0][0])
        prec_pr.append(f['precursor_scattering'][1])
        diffraction_peaks.append(f['diffraction_peaks'][0][0])
        peaks_pr.append(f['diffraction_peaks'][1])
    else:
        form_factor_scattering.append(False)
        form_pr.append('N/A')
        precursor_scattering.append(False)
        prec_pr.append('N/A')
        diffraction_peaks.append(False)
        peaks_pr.append('N/A')

preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_c

running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classif

running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classif

running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classif

running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classif

running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classifier_test for execution

running workflow:
['read_csv', 'load_classifier']
['profile']
['classify']
running: ['read_csv', 'load_classifier']
running: ['profile']
running: ['classify']
execution finished
preparing workflow classif

In [24]:
test['paws_pr_bad_data'] =  bad_data_labels
test['paws_pr_bad_data_pr'] =  bad_data_pr # propability to have this bad_data label
test['paws_pr_form'] =  form_factor_scattering
test['paws_pr_form_pr'] =  form_pr
test['paws_pr_precursor'] =  precursor_scattering
test['paws_pr_precursor_pr'] =  prec_pr
test['paws_pr_structure'] =  diffraction_peaks
test['paws_pr_structure_pr'] =  peaks_pr

In [25]:
accuracy_score(test['bad_data'], test['paws_pr_bad_data'])

0.96499999999999997

In [26]:
accuracy_score(test['form'], test['paws_pr_form'])

0.98999999999999999

In [27]:
accuracy_score(test['precursor'], test['paws_pr_precursor'])

0.77500000000000002

In [28]:
accuracy_score(test['structure'], test['paws_pr_structure'])

0.98999999999999999

## Result 
We got accuracy as we expected. "Built in" prediction model in PAWS work well. 