forked from HBPMedical/algorithm-repository
-
Notifications
You must be signed in to change notification settings - Fork 1
/
mip_hinmine.py
100 lines (86 loc) · 3.41 KB
/
mip_hinmine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python3.6
'''
HINMine wrapper for the HBP medical platform.
@author: jan.kralj@ijs.si
'''
import logging
from mip_helper import io_helper
import numpy as np
import scipy.sparse as sp
import networkx as nx
import json
import cf_netSDM
def adjacency_distance(vector_1, vector_2):
v = vector_1 - vector_2
return np.exp(-np.dot(v, v))
def construct_adjacency_graph(item_names, item_features, item_labels):
graph = nx.Graph()
for item_name, item_label in zip(item_names, item_labels):
graph.add_node(item_name, type='basic')
graph.node[item_name]['labels'] = str(item_label)
structure = cf_netSDM.lib.HIN.HeterogeneousInformationNetwork(graph, ',')
structure.split_to_indices(train_indices=range(len(structure.node_list)))
n = len(structure.node_list)
matrix = np.zeros((n, n))
for i in range(n):
if i % 100 == 0:
logging.info('Finished %i' % i)
for j in range(n):
d = adjacency_distance(item_features[i], item_features[j])
structure.graph.add_edge(item_names[i], item_names[j], weight=d)
matrix[i, j] = d
structure.decomposed['decomposition'] = sp.csr_matrix(matrix)
structure.basic_type = 'basic'
return structure
def main():
# configure logging
logging.basicConfig(level=logging.INFO)
logging.info(cf_netSDM)
# Read inputs
inputs = io_helper.fetch_data()
data = inputs['data']
normalize = get_param(inputs['parameters'], 'normalize', bool, 'True')
damping = get_param(inputs['parameters'], 'damping', float, '0.85')
data_array = np.zeros((len(data['independent'][0]['series']), len(data['independent'])))
col_number = 0
row_number = 0
for var in data['independent']:
for value in var['series']:
data_array[row_number, col_number] = value
row_number += 1
col_number += 1
row_number = 0
if normalize:
for col_number in range(data_array.shape[1]):
data_array[:, col_number] = data_array[:, col_number] / np.linalg.norm(data_array[:, col_number])
network = construct_adjacency_graph(range(data_array.shape[0]), data_array, data['dependent'][0]['series'])
propositionalized = cf_netSDM.hinmine_propositionalize(network, damping)['train_features']['data']
results_dict = {
'profile': 'tabular-data-resource',
'name': 'hinmine-features',
'data': [],
'schema': {
'fields': [],
'primaryKey': 'id'
}
}
n = propositionalized.shape[0]
for row_index in range(n):
instance = {"id": row_index}
for col_index in range(n):
instance["feature_%i" % (col_index + 1)] = propositionalized[row_index, col_index]
results_dict['data'].append(instance)
for col_index in range(n):
results_dict['schema']['fields'].append({'name': 'feature_%i' % (col_index + 1), 'type': 'float'})
io_helper.save_results(json.dumps(results_dict), '', 'text/plain')
def get_param(params_list, param_name, type, default_value):
for p in params_list:
if p["name"] == param_name:
try:
return type(p["value"])
except ValueError:
logging.info('%s cannot be caset as %s' % (p['value'], str(type)))
logging.info("Using default value of parameter %s: %s" % (param_name, default_value))
return type(default_value)
if __name__ == '__main__':
main()