In [1]:
from citrination_client import CitrinationClient
from citrination_client import PifSystemReturningQuery
from citrination_client import DatasetQuery
from citrination_client import DataQuery
from citrination_client import Filter

from pypif.pif import dumps
import json 
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
import saxskit

from saxskit.saxskit.saxs_classify import SaxsClassifier
from saxskit.saxskit.saxs_regression import SaxsRegressor
from saxskit.saxskit.saxs_math import profile_spectrum
from saxskit.saxskit.saxs_models import get_data_from_Citrination

from os import listdir
from os.path import isfile, join

I created dataview 'unidentified yes/no' on Citrination that should predict 'unidentified' labels given 13 features (profile key).

In [3]:
with open("citrination_api_key_ssrl.txt", "r") as g: 
    api_key = g.readline()

a_key = api_key.strip()

client = CitrinationClient(site='https://slac.citrination.com',api_key=a_key ) 

### Using dataview 'unidentified yes/no':

#### Make a prediction for a sample from user machine:

In [4]:
q_i = np.genfromtxt ('my_data/A_210C_0212114344_0001_dz_bgsub.csv', delimiter=",")
features = profile_spectrum(q_i)
features

OrderedDict([('Imax_over_Imean', 4.517864065692117),
             ('Imax_sharpness', 1.099622261523832),
             ('I_fluctuation', 0.013731779181704483),
             ('logI_fluctuation', 16.976297914971699),
             ('logI_max_over_std', 4.3019224595005117),
             ('r_fftIcentroid', 0.12748515104545091),
             ('r_fftImax', 0.0017857142857142857),
             ('q_Icentroid', 0.24748247141186733),
             ('q_logIcentroid', 0.22692825356325344),
             ('pearson_q', -0.79559437880058759),
             ('pearson_q2', -0.75112702457148206),
             ('pearson_expq', -0.78642690663881609),
             ('pearson_invexpq', 0.80268675204651729)])

We need to add "Property" before the names of features:

In [5]:
inputs = {}
for k,v in features.items():
    k = "Property " + k
    inputs[k] = v
inputs

{'Property I_fluctuation': 0.013731779181704483,
 'Property Imax_over_Imean': 4.517864065692117,
 'Property Imax_sharpness': 1.099622261523832,
 'Property logI_fluctuation': 16.976297914971699,
 'Property logI_max_over_std': 4.3019224595005117,
 'Property pearson_expq': -0.78642690663881609,
 'Property pearson_invexpq': 0.80268675204651729,
 'Property pearson_q': -0.79559437880058759,
 'Property pearson_q2': -0.75112702457148206,
 'Property q_Icentroid': 0.24748247141186733,
 'Property q_logIcentroid': 0.22692825356325344,
 'Property r_fftIcentroid': 0.12748515104545091,
 'Property r_fftImax': 0.0017857142857142857}

In [6]:
resp = client.predict("21", inputs)
prediction = resp['candidates'][0]['Property unidentified']
prediction

['0', 0.01325820666500124]

#### Make predictions for all our samples and compare with true labels:

In [7]:
data = get_data_from_Citrination(client, [1,15,16])
data.head()

Unnamed: 0,experiment_id,Imax_over_Imean,Imax_sharpness,I_fluctuation,logI_fluctuation,logI_max_over_std,r_fftIcentroid,r_fftImax,q_Icentroid,q_logIcentroid,...,guinier_porod,spherical_normal,diffraction_peaks,I0_floor,G_gp,rg_gp,D_gp,I0_sphere,r0_sphere,sigma_sphere
171,R1,18.8777,1.03372,0.00109968,4.19634,2.87545,0.107935,0.00185529,0.064653,-0.674134,...,0,1,0,0.185712,,,,1191.61,33.9117,0.0335728
42,R1,15.3204,2.53745,0.00278489,24.9187,2.97657,0.105759,0.00185529,0.0757829,-0.140669,...,0,1,1,,,,,0.0,,0.0
69,R1,18.7701,1.0313,0.00109659,5.24908,3.03462,0.107702,0.00185529,0.0649956,-0.258935,...,0,1,0,0.250425,,,,1394.91,33.6123,0.0336855
1643,Reaction_C,4.25667,1.05704,0.0187776,18.3502,5.05902,0.144492,0.0071599,0.300745,0.289633,...,1,0,0,7.73547e-17,1.81139,1.83064,4.0,,,
11,R1,1.28889,1.21086,0.0205457,36.719,9.75636,0.179186,0.00185529,0.322812,0.324378,...,0,0,0,,,,,,,


In [8]:
profile_keys = [\
    'Imax_over_Imean',\
    'Imax_sharpness',\
    'I_fluctuation',\
    'logI_fluctuation',\
    'logI_max_over_std',\
    'r_fftIcentroid',\
    'r_fftImax',\
    'q_Icentroid',\
    'q_logIcentroid',\
    'pearson_q',\
    'pearson_q2',\
    'pearson_expq',\
    'pearson_invexpq']

In [9]:
inputs = []

for i in range(data.shape[0]):
    sample = {}
    for k in profile_keys:
        sample["Property " + k] = data.iloc[i][k]
    inputs.append(sample)

In [11]:
resp = []
for i in range(data.shape[0]):
    r = client.predict("21", inputs[i])
    resp.append(r['candidates'][0]['Property unidentified'])
resp[ : 10]

[['0', 0.01304607535836122],
 ['0', 0.01304607535836122],
 ['0', 0.01304607535836122],
 ['0', 0.013632262652415069],
 ['1', 0.013298751639511948],
 ['1', 0.014211411065752962],
 ['0', 0.013603832490470511],
 ['1', 0.013435710976685087],
 ['0', 0.01304607535836122],
 ['0', 0.015117120925099908]]

In [12]:
result = np.array(resp)[ : , 0]
results = list(map(int, result))

In [13]:
import sklearn
sklearn.metrics.accuracy_score(data.unidentified, results)

0.99486125385405966

In [14]:
from collections import OrderedDict

inputs2 = []

for i in range(data.shape[0]):
    sample = OrderedDict()
    for k in profile_keys:
        sample[k] = data.iloc[i][k]
    inputs2.append(sample)

In [15]:
m = SaxsClassifier()

result2 = []

for i in range(data.shape[0]):
    flags = m.run_classifier(inputs2[i])
    result2.append(flags['unidentified'][0])

In [16]:
import sklearn
sklearn.metrics.accuracy_score(data.unidentified, result2)

0.99023638232271327

**We got about the same accuracy using citrination model and our model.**