In [1]:
from citrination_client import CitrinationClient
from citrination_client import PifSystemReturningQuery
from citrination_client import DatasetQuery
from citrination_client import DataQuery
from citrination_client import Filter

from pypif.pif import dumps
import json 
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [14]:
import saxskit

from saxskit.saxskit.saxs_classify import SaxsClassifier
from saxskit.saxskit.saxs_regression import SaxsRegressor
from saxskit.saxskit.saxs_math import profile_spectrum
from saxskit.saxskit.saxs_models import get_data_from_Citrination

from os import listdir
from os.path import isfile, join

I created dataview 'unidentified yes/no' on Citrination that should predict 'unidentified' labels given 13 features (profile key).

In [2]:
with open("citrination_api_key_ssrl.txt", "r") as g: 
    api_key = g.readline()

a_key = api_key.strip()

client = CitrinationClient(site='https://slac.citrination.com',api_key=a_key ) 

### Using dataview 'unidentified yes/no':

#### Make a prediction for a sample from user machine:

In [46]:
q_i = np.genfromtxt ('my_data/A_210C_0212114344_0001_dz_bgsub.csv', delimiter=",")
features = profile_spectrum(q_i)
features

OrderedDict([('Imax_over_Imean', 4.517864065692117),
             ('Imax_sharpness', 1.099622261523832),
             ('I_fluctuation', 0.013731779181704483),
             ('logI_fluctuation', 16.976297914971699),
             ('logI_max_over_std', 4.3019224595005117),
             ('r_fftIcentroid', 0.12748515104545091),
             ('r_fftImax', 0.0017857142857142857),
             ('q_Icentroid', 0.24748247141186733),
             ('q_logIcentroid', 0.22692825356325344),
             ('pearson_q', -0.79559437880058759),
             ('pearson_q2', -0.75112702457148206),
             ('pearson_expq', -0.78642690663881609),
             ('pearson_invexpq', 0.80268675204651729)])

In [47]:
inputs = {}
for k,v in features.items():
    k = "Property " + k
    inputs[k] = v
inputs

{'Property I_fluctuation': 0.013731779181704483,
 'Property Imax_over_Imean': 4.517864065692117,
 'Property Imax_sharpness': 1.099622261523832,
 'Property logI_fluctuation': 16.976297914971699,
 'Property logI_max_over_std': 4.3019224595005117,
 'Property pearson_expq': -0.78642690663881609,
 'Property pearson_invexpq': 0.80268675204651729,
 'Property pearson_q': -0.79559437880058759,
 'Property pearson_q2': -0.75112702457148206,
 'Property q_Icentroid': 0.24748247141186733,
 'Property q_logIcentroid': 0.22692825356325344,
 'Property r_fftIcentroid': 0.12748515104545091,
 'Property r_fftImax': 0.0017857142857142857}

In [48]:
resp = client.predict("21", inputs)
prediction = resp['candidates'][0]['Property unidentified']
prediction

['0', 0.01325820666500124]

#### Make predictions for all our samples and compare with true labels:

In [15]:
data = get_data_from_Citrination(client, [1,15,16])
data.head()

Unnamed: 0,experiment_id,Imax_over_Imean,Imax_sharpness,I_fluctuation,logI_fluctuation,logI_max_over_std,r_fftIcentroid,r_fftImax,q_Icentroid,q_logIcentroid,...,guinier_porod,spherical_normal,diffraction_peaks,I0_floor,G_gp,rg_gp,D_gp,I0_sphere,r0_sphere,sigma_sphere
875,R6,1.57517,1.28644,0.0188467,14.7109,5.76549,0.142637,0.00185529,0.295223,0.29009,...,1,0,0,8.04319e-20,2.91783,2.24152,4.0,,,
580,R4,1.36818,1.18019,0.0197145,16.8472,6.93418,0.155495,0.00185529,0.303624,0.303638,...,1,0,0,0.0,3.07585,1.81659,4.0,,,
449,R3,1.76463,1.13364,0.023499,15.8074,2.55102,0.155732,0.00185529,0.313596,0.294562,...,1,0,0,0.0,2.21653,2.291,4.0,,,
1717,Reaction_D,1.98613,1.37409,0.0308826,1.28871,0.727326,0.162112,0.00238663,0.333439,0.425031,...,1,0,0,1.44378,1.48436e-17,0.0854349,4.0,,,
612,R4,73.5036,2.89989,0.00235084,12.2175,3.90045,0.0877313,0.00185529,0.0934795,-4.71009,...,0,1,1,0.0,,,,,,


In [16]:
profile_keys = [\
    'Imax_over_Imean',\
    'Imax_sharpness',\
    'I_fluctuation',\
    'logI_fluctuation',\
    'logI_max_over_std',\
    'r_fftIcentroid',\
    'r_fftImax',\
    'q_Icentroid',\
    'q_logIcentroid',\
    'pearson_q',\
    'pearson_q2',\
    'pearson_expq',\
    'pearson_invexpq']

In [49]:
inputs = []

for i in range(data.shape[0]):
    sample = {}
    for k in profile_keys:
        sample["Property " + k] = data.iloc[i][k]
        inputs.append(sample)

In [50]:
resp = []
for i in range(data.shape[0]):
    r = client.predict("21", inputs[i])
    resp.append(r['candidates'][0]['Property unidentified'])
resp[ : 10]

[['0', 0.013111633525991175],
 ['0', 0.013111633525991175],
 ['0', 0.013111633525991175],
 ['0', 0.013111633525991175],
 ['0', 0.013111633525991175],
 ['0', 0.013111633525991175],
 ['0', 0.013111633525991175],
 ['0', 0.013111633525991175],
 ['0', 0.013111633525991175],
 ['0', 0.013111633525991175]]

In [51]:
result = np.array(resp)[ : , 0]
results = list(map(int, result))

In [52]:
import sklearn
sklearn.metrics.accuracy_score(data.unidentified, results)

0.85971223021582732

Accuracy is not as good as I expected. 