# Get SCOS Info

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import pickle
from tqdm.notebook import tqdm,trange
from pprint import pprint
import pdb

In [3]:
from convert_scos import *

In [None]:
fpath = '/mnt/esa-ocai/ioa/karel'
fpath_remote = '../ioa/karel'

In [8]:
prefixes = ["""@base <http://ioa-graph/resource/> .
@prefix ioa: <http://www.semanticweb.org/esa-ioa/ontologies/2022/ioa-wiki-ontology#> .
@prefix pred: <http://www.semanticweb.org/esa-ioa/ontologies/2022/predicates-ontology#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> ."""]

## GAIA

In [4]:
gaia_parse = pickle.load(open('gaia_parsers.pkl','rb'))

In [52]:
def get_single_parent(parents):
    p_list = ['aocs','stracker','gyro','fss','cps']
    for p in p_list:
        if len(parents)==1:
            return parents[0] if parents[0]!='stracker' else 'str'
        if p in val:
            parents.remove(p)
            return get_single_parent(parents)
    
    p_join = ''
    for p in parents:
        p_join += p
        
    if 'fss' in p_join:
        return 'fss'
    if 'cps' in p_join:
        return 'cps'
    
    return p_join

### ReportingData

In [None]:
colnames_pcf = ['PCF_NAME', 'PCF_DESCR', 'PCF_PID', 'PCF_UNIT', 'PCF_PTC', 
                'PCF_PFC', 'PCF_WIDTH', 'PCF_VALID', 'PCF_RELATED', 'PCF_CATEG', 
                'PCF_NATUR', 'PCF_CURTX', 'PCF_INTER', 'PCF_USCON', 'PCF_DECIM',
                'PCF_PARVAL', 'PCF_SUBSYS', 'PCF_VALPAR', 'PCF_SPTYPE', 'PCF_CORR',
                'PCF_OBTID', 'PCF_DARC', 'PCF_ENDIAN']

In [None]:
fname_gaia = fpath + '/gaia/s2k_mib/pcf.dat'
data_report_gaia = pd.read_csv(fname_gaia, sep='\t', names=colnames_pcf, index_col=False, usecols=[0,1,2,3,16])
data_report_gaia

In [None]:
data_report_gaia['PCF_UNIT'].unique()

In [None]:
parse_rd = gaia_parse['ReportingData']
data_report_gaia['parents'] = [[] for _ in range(len(data_report_gaia))]
parent_masks = {}

for parse,parent in tqdm(zip(parse_rd['parser'], parse_rd['parent']), total=len(parse_rd['parent'])):
    parse_str = [p.decode("utf-8").strip('\'') for p in parse]
    parent_str = parent.decode("utf-8")
    
    name_mask = data_report_gaia['PCF_NAME'].str.contains(parse_str[0], regex=True, na=False)
    descr_mask = data_report_gaia['PCF_DESCR'].str.contains(parse_str[1], regex=True, na=False)
    subsys_mask = data_report_gaia['PCF_SUBSYS'].str.contains(parse_str[2], regex=True, na=False)
    
    parent_masks[parent_str] = name_mask & descr_mask & subsys_mask
    
    for ind in data_report_gaia.loc[name_mask & descr_mask & subsys_mask].index:
        data_report_gaia.loc[ind,'parents'].append(parent_str)

In [None]:
data_report_gaia['parents'].map(len).value_counts()

In [None]:
data_report_gaia['parent_name'] = ['' for _ in range(len(data_report_gaia))]

for i, val in tqdm(data_report_gaia['parents'].items(),total=len(data_report_gaia)):
    data_report_gaia.loc[i,'parent_name'] = 'gaia_' + get_single_parent(val)

In [None]:
data_report_gaia['parent_name'] = data_report_gaia['parent_name'].map(lambda s: s + 'sc' if s.endswith('_') else s)
data_report_gaia

In [None]:
data_report_gaia.loc[data_report_gaia['parents'].map(len)>0]

In [None]:
data_report_gaia['PCF_NAME'].describe()

In [None]:
data_report_gaia.to_csv('reporting_data_gaia.csv', index=False)

Convert to rdf

In [None]:
gaia_rd = pd.read_csv('reporting_data_gaia.csv')
gaia_rd

In [None]:
test_str = row2rdf_reportingdata(gaia_rd.loc[4291],'gaia')
pprint(test_str)

In [None]:
gaia_rd_rdf = prefixes.copy()

for index, row in tqdm(gaia_rd.iterrows(), total=len(gaia_rd)):
    gaia_rd_rdf.append(row2rdf_reportingdata(row,'gaia'))

In [None]:
with open('gaia_reporting_data.ttl', 'w') as f:
    f.write('\n\n'.join(gaia_rd_rdf))

Example issues

In [None]:
data_report_gaia.loc[(parent_masks['cpsa'] & parent_masks['cpsb']), 'PCF_DESCR'].values

In [None]:
descr_cpsa = data_report_gaia['PCF_DESCR'].str.contains(".*CPS.*A.*", regex=True, na=False)
subsys_cpsa = data_report_gaia['PCF_SUBSYS'].str.contains('CPS', regex=True, na=False)

data_report_gaia.loc[descr_cpsa & subsys_cpsa]

### Activity

In [None]:
colnames_ccf = ['CCF_CNAME', 'CCF_DESCR', 'CCF_DESCR2', 'CCF_CTYPE', 'CCF_CRITICAL', 
                'CCF_PKTID', 'CCF_TYPE', 'CCF_STYPE', 'CCF_APID', 'CCF_NPARS', 
                'CCF_PLAN', 'CCF_EXEC', 'CCF_ILSCOPE', 'CCF_ILSTAGE', 'CCF_SUBSYS',
                'CCF_HIPRI', 'CCF_MAPID', 'CCF_DEFSET', 'CCF_RAPID', 'CCF_ACK',
                'CCF_SUBSCHEDID']

In [None]:
fname_gaia = fpath + '/gaia/s2k_mib/ccf.dat'
data_activity_gaia = pd.read_csv(fname_gaia, sep='\t', names=colnames_ccf, index_col=False, usecols=[0,1,2,6,7,8,14,20])
data_activity_gaia

In [None]:
parse_act = gaia_parse['Activity']
parse_act

In [None]:
data_activity_gaia['parents'] = [[] for _ in range(len(data_activity_gaia))]
parent_masks = {}

for parse,parent in tqdm(zip(parse_act['parser'], parse_act['parent']), total=len(parse_act['parent'])):
    parse_str = [p.decode("utf-8").strip('\'') for p in parse]
    parent_str = parent.decode("utf-8")
    
    name_mask = data_activity_gaia['CCF_CNAME'].str.contains(parse_str[0], regex=True, na=False)
    descr_mask = data_activity_gaia['CCF_DESCR'].str.contains(parse_str[1], regex=True, na=False)
    type_mask = ~data_activity_gaia['CCF_TYPE'].isnull()
    stype_mask = ~data_activity_gaia['CCF_STYPE'].isnull()
    apid_mask = ~data_activity_gaia['CCF_APID'].isnull()
    
    parent_masks[parent_str] = name_mask & descr_mask & type_mask & stype_mask & apid_mask
    
    for ind in data_activity_gaia.loc[name_mask & descr_mask & type_mask & stype_mask & apid_mask].index:
        data_activity_gaia.loc[ind,'parents'].append(parent_str)

In [None]:
data_activity_gaia['parents'].map(len).value_counts()

In [None]:
data_activity_gaia['parent_name'] = ['' for _ in range(len(data_activity_gaia))]

for i, val in tqdm(data_activity_gaia['parents'].items(),total=len(data_activity_gaia)):
    data_activity_gaia.loc[i,'parent_name'] = 'gaia_' + get_single_parent(val)

In [None]:
data_activity_gaia['parent_name'] = data_activity_gaia['parent_name'].map(lambda s: s + 'sc' if s.endswith('_') else s)
data_activity_gaia

In [None]:
data_activity_gaia.parent_name.value_counts()

In [None]:
data_activity_gaia.to_csv('activity_gaia.csv', index=False)

Convert to rdf

In [None]:
gaia_act = pd.read_csv('activity_gaia.csv')
gaia_act

In [None]:
test_str = row2rdf_activity(gaia_act.loc[2137])
pprint(test_str)

In [None]:
gaia_act_rdf = prefixes.copy()

for index, row in tqdm(gaia_act.iterrows(), total=len(gaia_act)):
    gaia_act_rdf.append(row2rdf_activity(row))

In [None]:
with open('gaia_activity.ttl', 'w') as f:
    f.write('\n\n'.join(gaia_act_rdf))

### Event

In [5]:
colnames_pid = ['PID_TYPE', 'PID_STYPE', 'PID_APID', 'PID_PI1_VAL', 'PID_PI2_VAL',
                'PID_SPID', 'PID_DESCR', 'PID_UNIT', 'PID_TPSD', 'PID_DFHSIZE',
                'PID_TIME', 'PID_INTER', 'PID_VALID', 'PID_CHECK', 'PID_EVENT',
                'PID_EVID']

In [3]:
fpath = "."

In [1]:
import pandas as pd

In [6]:
fname_gaia = fpath + '/gaia/s2k_mib/pid.dat'
data_event_gaia = pd.read_csv(fname_gaia, sep='\t', names=colnames_pid, index_col=False)#, usecols=[0,1,2,6,7])
data_event_gaia

Unnamed: 0,PID_TYPE,PID_STYPE,PID_APID,PID_PI1_VAL,PID_PI2_VAL,PID_SPID,PID_DESCR,PID_UNIT,PID_TPSD,PID_DFHSIZE,PID_TIME,PID_INTER,PID_VALID,PID_CHECK,PID_EVENT,PID_EVID
0,0,0,0,0,0,5000,SCC Message,0.0,-1,0,N,,Y,1,N,
1,0,0,0,0,0,1000000,Bad Packet,,-1,0,N,,Y,1,N,
2,0,0,1,0,0,1000006,Frame,,-1,0,N,,Y,1,N,
3,0,0,50,0,0,1111,Ground station response,,-1,0,N,,Y,1,N,
4,0,0,1923,0,0,1000098,Tco_Time_Couples SCET,0.0,-1,16,Y,,Y,1,N,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2581,172,78,179,0,0,10372078,FSS compensation model,,10372078,16,Y,,Y,1,N,
2582,176,7,211,0,0,160676007,TCS Heater line param,,-1,16,Y,,Y,1,N,
2583,190,22,1966,0,0,1000008,mini bad frame,,-1,0,N,,Y,1,N,
2584,190,23,1966,0,0,1000007,mini good frame,,-1,0,N,,Y,1,N,


In [14]:
colnames_tpcf = ['TPCF_SPID','TPCF_NAME','TPCF_SIZE']
fname_gaia = fpath + '/gaia/s2k_mib/tpcf.dat'
telemetry_name = pd.read_csv(fname_gaia, sep='\t', names=colnames_tpcf, index_col=False)
telemetry_name

Unnamed: 0,TPCF_SPID,TPCF_NAME,TPCF_SIZE
0,100,Dynamic Cons,0.0
1,1000,Saved_Synth,0.0
2,1111,GNDRESP,34.0
3,1927,FTS model,0.0
4,4000,YDW94000,0.0
...,...,...,...
2581,4212957070,YDW99127,0.0
2582,4212957094,YDW99112,0.0
2583,4212957125,YDW99121,0.0
2584,4212957168,YDW99120,0.0


In [40]:
data_event_gaia = data_event_gaia.set_index('PID_SPID').join(telemetry_name.set_index('TPCF_SPID'))

In [29]:
data_event_gaia.loc[:,"PID_SPID"]

0            5000
1         1000000
2         1000006
3            1111
4         1000098
          ...    
2581     10372078
2582    160676007
2583      1000008
2584      1000007
2585      1000009
Name: PID_SPID, Length: 2586, dtype: int64

In [17]:
data_event_gaia.PID_UNIT.value_counts()

0.0    6
Name: PID_UNIT, dtype: int64

In [12]:
colnames_cvs = ['CVS_ID','CVS_TYPE','CVS_SOURCE','CVS_START','CVS_INTERVAL','CVS_SPID', 'CVS_UNCERTAINT']
fname_gaia = fpath + '/gaia/s2k_mib/cvs.dat'
verification_1 = pd.read_csv(fname_gaia, sep='\t', names=colnames_cvs, index_col=False)
verification_1

Unnamed: 0,CVS_ID,CVS_TYPE,CVS_SOURCE,CVS_START,CVS_INTERVAL,CVS_SPID,CVS_UNCERTAINT
0,30000,A,R,0,30,,-1
1,30001,C,R,0,30,,-1
2,30002,S,V,0,300,,-1
3,30003,S,V,0,210,,-1
4,30004,S,V,0,210,,-1
...,...,...,...,...,...,...,...
489,30522,S,V,0,60,,-1
490,30524,S,V,0,60,,-1
491,30526,S,V,0,60,,-1
492,30528,S,V,0,60,,-1


In [13]:
colnames_cvs = ['CVE_CVSID','CVE_PARNAM','CVE_INTER','CVE_VAL','CVE_TOL','CVE_CHECK']
fname_gaia = fpath + '/gaia/s2k_mib/cve.dat'
verification_2 = pd.read_csv(fname_gaia, sep='\t', names=colnames_cvs, index_col=False)
verification_2

Unnamed: 0,CVE_CVSID,CVE_PARNAM,CVE_INTER,CVE_VAL,CVE_TOL,CVE_CHECK
0,30002,NST80001,E,ON,,B
1,30003,NST81052,E,INI,,B
2,30004,NST81052,E,STB,,B
3,30005,NST81001,E,ATFAD,,B
4,30006,NST81001,E,STB,,B
...,...,...,...,...,...,...
509,30522,NV302115,E,DISABLED,,B
510,30524,NV402115,E,DISABLED,,B
511,30526,NV502115,E,DISABLED,,B
512,30528,NV602115,E,DISABLED,,B


In [None]:
cvs
cve

In [22]:
data_event_gaia[data_event_gaia['PID_SPID'] == 510310016]

Unnamed: 0,PID_TYPE,PID_STYPE,PID_APID,PID_PI1_VAL,PID_PI2_VAL,PID_SPID,PID_DESCR,PID_UNIT,PID_TPSD,PID_DFHSIZE,PID_TIME,PID_INTER,PID_VALID,PID_CHECK,PID_EVENT,PID_EVID
1438,5,2,183,10016,0,510310016,5_2 LOW YACX2720 MON_LIB transition to normal,,-1,16,Y,,Y,1,W,


In [14]:
type([entry for entry in data_event_gaia['PID_EVID'].values if entry != np.nan][0])

numpy.float64

In [20]:
#data_event_gaia.dropna(subset=["PID_EVID"])

In [17]:
import numpy as np
np.nan == [entry for entry in data_event_gaia['PID_EVID'].values if entry != np.nan][0]

False

In [None]:
data_event_gaia.loc[1382]

In [None]:
data_event_gaia.loc[1383]

In [38]:
data_event_gaia.loc[1503].PID_DESCR

'5_3 MED YDWX2721 first RTC timeout'

In [20]:
data_event_gaia[data_event_gaia.PID_DESCR.str.contains("YPLX2F07")]

Unnamed: 0,PID_TYPE,PID_STYPE,PID_APID,PID_PI1_VAL,PID_PI2_VAL,PID_SPID,PID_DESCR,PID_UNIT,PID_TPSD,PID_DFHSIZE,PID_TIME,PID_INTER,PID_VALID,PID_CHECK,PID_EVENT,PID_EVID
1381,5,1,199,12039,0,661212039,5_1 INFO YPLX2F07 PDHU event DOWNLINK_MODE_TRA...,,-1,16,Y,,Y,1,I,


In [13]:
fname_gaia = fpath + '/gaia/event_gaia.csv'
data_event_gaia = pd.read_csv(fname_gaia, sep=',')#, usecols=[0,1,2,6,7])
data_event_gaia

Unnamed: 0,PID_TYPE,PID_STYPE,PID_APID,PID_DESCR,PID_UNIT,parents,parent_name
0,0,0,0,SCC Message,0.0,[],gaia_sc
1,0,0,0,Bad Packet,,[],gaia_sc
2,0,0,1,Frame,,[],gaia_sc
3,0,0,50,Ground station response,,[],gaia_sc
4,0,0,1923,Tco_Time_Couples SCET,0.0,[],gaia_sc
...,...,...,...,...,...,...,...
2581,172,78,179,FSS compensation model,,['fss'],gaia_fss
2582,176,7,211,TCS Heater line param,,[],gaia_sc
2583,190,22,1966,mini bad frame,,[],gaia_sc
2584,190,23,1966,mini good frame,,[],gaia_sc


In [19]:
data_event_gaia.PID_EVENT.value_counts()

N    1811
A     647
I      70
W      58
Name: PID_EVENT, dtype: int64

In [24]:
parse_ev = gaia_parse['Event']
#parse_ev

In [26]:
parse_ev['parent']

[b'aocs',
 b'stracker',
 b'str1',
 b'str2',
 b'gyro',
 b'gyro1',
 b'gyro2',
 b'gyro3',
 b'fss',
 b'fss1',
 b'fss2',
 b'fss3',
 b'cps',
 b'cpsa',
 b'cpsb',
 b'mps']

In [28]:
data_event_gaia['PID_TYPE'].isnull()

0       False
1       False
2       False
3       False
4       False
        ...  
2581    False
2582    False
2583    False
2584    False
2585    False
Name: PID_TYPE, Length: 2586, dtype: bool

In [41]:
data_event_gaia['parents'] = [[] for _ in range(len(data_event_gaia))]
parent_masks = {}

for parse,parent in tqdm(zip(parse_ev['parser'], parse_ev['parent']), total=len(parse_ev['parent'])):
    parse_str = [p.decode("utf-8").strip('\'') for p in parse]
    parent_str = parent.decode("utf-8")

    #type_mask = ~data_event_gaia['PID_TYPE'].isnull()
    #stype_mask = ~data_event_gaia['PID_STYPE'].isnull()
    #apid_mask = ~data_event_gaia['PID_APID'].isnull()
    descr_mask = data_event_gaia['PID_DESCR'].str.contains(parse_str[3], regex=True, na=False)
    
    #parent_masks[parent_str] = type_mask & stype_mask & apid_mask & descr_mask
    parent_masks[parent_str] = descr_mask
    
    #for ind in data_event_gaia.loc[type_mask & stype_mask & apid_mask & descr_mask].index:
    for ind in data_event_gaia.loc[descr_mask].index:
        #data_event_gaia.loc[ind,'parents'].append(parent_str)
        data_event_gaia.loc[ind,'parents'].append(parent_str)

  0%|          | 0/16 [00:00<?, ?it/s]

In [42]:
data_event_gaia['parents'].map(len).value_counts()

0    2339
1     142
2      89
3      16
Name: parents, dtype: int64

In [43]:
data_event_gaia['parent_name'] = ['' for _ in range(len(data_event_gaia))]

for i, val in tqdm(data_event_gaia['parents'].items(),total=len(data_event_gaia)):
    data_event_gaia.loc[i,'parent_name'] = 'gaia_' + get_single_parent(val)

  0%|          | 0/2586 [00:00<?, ?it/s]

In [37]:
#data_event_gaia

In [44]:
data_event_gaia['parent_name'] = data_event_gaia['parent_name'].map(lambda s: s + 'sc' if s.endswith('_') else s)
data_event_gaia

Unnamed: 0_level_0,PID_TYPE,PID_STYPE,PID_APID,PID_PI1_VAL,PID_PI2_VAL,PID_DESCR,PID_UNIT,PID_TPSD,PID_DFHSIZE,PID_TIME,PID_INTER,PID_VALID,PID_CHECK,PID_EVENT,PID_EVID,parents,parent_name,TPCF_NAME,TPCF_SIZE
PID_SPID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
5000,0,0,0,0,0,SCC Message,0.0,-1,0,N,,Y,1,N,,[],gaia_sc,SCC,0.0
1000000,0,0,0,0,0,Bad Packet,,-1,0,N,,Y,1,N,,[],gaia_sc,BAD PKT,0.0
1000006,0,0,1,0,0,Frame,,-1,0,N,,Y,1,N,,[],gaia_sc,FRAME,0.0
1111,0,0,50,0,0,Ground station response,,-1,0,N,,Y,1,N,,[],gaia_sc,GNDRESP,34.0
1000098,0,0,1923,0,0,Tco_Time_Couples SCET,0.0,-1,16,Y,,Y,1,N,,[],gaia_sc,SCETCouple,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10372078,172,78,179,0,0,FSS compensation model,,10372078,16,Y,,Y,1,N,,[fss],gaia_fss,YAC72078,0.0
160676007,176,7,211,0,0,TCS Heater line param,,-1,16,Y,,Y,1,N,,[],gaia_sc,YPF76007,86.0
1000008,190,22,1966,0,0,mini bad frame,,-1,0,N,,Y,1,N,,[],gaia_sc,MFBAD,137.0
1000007,190,23,1966,0,0,mini good frame,,-1,0,N,,Y,1,N,,[],gaia_sc,MFGOOD,137.0


In [39]:
data_event_gaia.parent_name.value_counts()

gaia_sc       2339
gaia_aocs      125
gaia_str1       21
gaia_str2       21
gaia_gyro       16
gaia_gyro2      14
gaia_gyro1      14
gaia_gyro3      14
gaia_cps        10
gaia_fss         8
gaia_str         4
Name: parent_name, dtype: int64

In [45]:
data_event_gaia.to_csv('event_gaia.csv',index=False)

Convert to rdf

In [1]:
import pandas as pd 
gaia_event = pd.read_csv('./gaia/event_gaia.csv')
gaia_event

Unnamed: 0,PID_TYPE,PID_STYPE,PID_APID,PID_PI1_VAL,PID_PI2_VAL,PID_DESCR,PID_UNIT,PID_TPSD,PID_DFHSIZE,PID_TIME,PID_INTER,PID_VALID,PID_CHECK,PID_EVENT,PID_EVID,parents,parent_name,TPCF_NAME,TPCF_SIZE
0,0,0,0,0,0,SCC Message,0.0,-1,0,N,,Y,1,N,,[],gaia_sc,SCC,0.0
1,0,0,0,0,0,Bad Packet,,-1,0,N,,Y,1,N,,[],gaia_sc,BAD PKT,0.0
2,0,0,1,0,0,Frame,,-1,0,N,,Y,1,N,,[],gaia_sc,FRAME,0.0
3,0,0,50,0,0,Ground station response,,-1,0,N,,Y,1,N,,[],gaia_sc,GNDRESP,34.0
4,0,0,1923,0,0,Tco_Time_Couples SCET,0.0,-1,16,Y,,Y,1,N,,[],gaia_sc,SCETCouple,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2581,172,78,179,0,0,FSS compensation model,,10372078,16,Y,,Y,1,N,,['fss'],gaia_fss,YAC72078,0.0
2582,176,7,211,0,0,TCS Heater line param,,-1,16,Y,,Y,1,N,,[],gaia_sc,YPF76007,86.0
2583,190,22,1966,0,0,mini bad frame,,-1,0,N,,Y,1,N,,[],gaia_sc,MFBAD,137.0
2584,190,23,1966,0,0,mini good frame,,-1,0,N,,Y,1,N,,[],gaia_sc,MFGOOD,137.0


In [2]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('all-mpnet-base-v2')



In [13]:
sentences = gaia_event.PID_DESCR.values[100:200]

encoding = model.encode(sentences)



In [22]:
names =  gaia_event.TPCF_NAME.values[100:200]


In [24]:
pd.set_option('display.max_columns', None)  
pd.set_option('display.max_colwidth', None)
from sentence_transformers import SentenceTransformer, util

#Compute cosine-similarities for each sentence with each other sentence
cosine_scores = util.cos_sim(encoding, encoding)

#Find the pairs with the highest cosine similarity scores
pairs = []
for i in range(len(cosine_scores)-1):
    for j in range(i+1, len(cosine_scores)):
        pairs.append({'index': [i, j], 'score': cosine_scores[i][j]})

#Sort scores in decreasing order
pairs = sorted(pairs, key=lambda x: x['score'], reverse=True)
results = []
for pair in pairs[0:50]:
    i, j = pair['index']
    #print("{} \t\t\t\t\t {} \t\t\t\t\t Score: {:.4f}".format(sentences[i], sentences[j], pair['score']))
    results.append({'descrp1': sentences[i],'name1':names[i], 'descrp2': sentences[j],'name2':names[j], 'score':pair['score']})
pd.DataFrame(results)

Unnamed: 0,descrp1,name1,descrp2,name2,score
0,Too many monitoring IDs enabled,YDW01076,Too many monitoring ID defined,YDW01090,tensor(0.9337)
1,unknown service type,YOB01268,unknown service subtype,YOB01269,tensor(0.9120)
2,Invalid type value in the TC,YDW01594,Invalid SubType value in the TC,YDW01595,tensor(0.9058)
3,Monitoring ID is not defined,YDW01074,monitoring param id is not defined,YDW01080,tensor(0.8909)
4,Invalid packet store policy,YDWX0F11,Invalid packet store protection,YDWX0F12,tensor(0.8718)
5,PSS_ACK_AFAIL,YZP09003,PSS_ACK_ESUCC,YZP09004,tensor(0.8467)
6,structure identifier out of range,YDW01768,structure identifier is not defined,YDW01778,tensor(0.8166)
7,Invalid packet store identifier,YDWX0F10,Invalid packet store protection,YDWX0F12,tensor(0.8162)
8,Invalid number of parameters,YDW01262,invalid parameter id,YDW01772,tensor(0.8032)
9,Amount of monitoring ids not in line with N,YDW01073,Too many monitoring ID defined,YDW01090,tensor(0.8029)


In [6]:
encoding

#Compute cosine-similarities
cosine_scores = util.cos_sim(embeddings1, embeddings2)

#Output the pairs with their score
for i in range(len(sentences1)):
    print("{} \t\t {} \t\t Score: {:.4f}".format(sentences1[i], sentences2[i], cosine_scores[i][i]))

In [6]:
test_str = row2rdf_event(gaia_event.loc[4],'gaia')
print(test_str)

<PID_SCETCouple> a ioa:Event ;
  pred:instance_of ioa:Event ;
  ioa:Description 'Tco_Time_Couples SCET'^^xsd:string ;
  ioa:PID_Event 'N'^^xsd:string ;
  ioa:parent <gaia_sc> .


In [9]:
gaia_event_rdf = prefixes.copy()

for index, row in tqdm(gaia_event.iterrows(), total=len(gaia_event)):
    gaia_event_rdf.append(row2rdf_event(row,'gaia'))

  0%|          | 0/2586 [00:00<?, ?it/s]

In [10]:
with open('gaia_event.ttl', 'w') as f:
    f.write('\n\n'.join(gaia_event_rdf))

## Cluster

### Reporting Data

In [None]:
colnames_pcf = ['PCF_NAME', 'PCF_DESCR', 'PCF_UNIT', 'PCF_VALID', 'PCF_CATEG',
                'PCF_WIDTH', 'PCF_NATUR', 'PCF_CODIN', 'PCF_CURTX', 'PCF_INTER',
                'PCF_USCON', 'PCF_CTSPE', 'PCF_CTREL', 'PCF_CTDIS', 'PCF_CTPRO',
                'PCF_DECIM', 'PCF_PARVAL', 'extra1']

In [None]:
sc_name = 'clu2'

fname_clu = fpath + '/cluster/mib/'+sc_name+'/PCF.dat'
data_report_cluster = pd.read_csv(fname_clu, names=colnames_pcf, index_col=False, skiprows=1)
data_report_cluster

In [None]:
data_report_cluster['PCF_UNIT'].unique()

In [None]:
data_report_cluster['parent_name'] = [sc_name for _ in range(len(data_report_cluster))]
data_report_cluster

### Activity

### Event