-
Notifications
You must be signed in to change notification settings - Fork 27
/
load_data.py
100 lines (76 loc) · 2.3 KB
/
load_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import io, sys
import json
import pandas as pd
from . import categories
from . import proc_df_labels
from . import data_formats
from . import make_unique_labels
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
def load_file(net, filename):
# reset network when loaing file, prevents errors when loading new file
# have persistent categories
# trying to improve re-initialization
# net.__init__()
net.reset()
f = open(filename, 'r')
file_string = f.read()
f.close()
load_file_as_string(net, file_string, filename)
def load_file_as_string(net, file_string, filename=''):
if (sys.version_info > (3, 0)):
# python 3
####################
file_string = str(file_string)
else:
# python 2
####################
file_string = unicode(file_string)
buff = io.StringIO(file_string)
if '/' in filename:
filename = filename.split('/')[-1]
net.load_tsv_to_net(buff, filename)
def load_stdin(net):
data = ''
for line in sys.stdin:
data = data + line
data = StringIO.StringIO(data)
net.load_tsv_to_net(data)
def load_tsv_to_net(net, file_buffer, filename=None):
lines = file_buffer.getvalue().split('\n')
num_labels = categories.check_categories(lines)
row_arr = list(range(num_labels['row']))
col_arr = list(range(num_labels['col']))
tmp_df = {}
# use header if there are col categories
if len(col_arr) > 1:
tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr,
header=col_arr)
else:
tmp_df['mat'] = pd.read_table(file_buffer, index_col=row_arr)
tmp_df = proc_df_labels.main(tmp_df)
net.df_to_dat(tmp_df, True)
net.dat['filename'] = filename
def load_json_to_dict(filename):
f = open(filename, 'r')
inst_dict = json.load(f)
f.close()
return inst_dict
def load_gmt(filename):
f = open(filename, 'r')
lines = f.readlines()
f.close()
gmt = {}
for i in range(len(lines)):
inst_line = lines[i].rstrip()
inst_term = inst_line.split('\t')[0]
inst_elems = inst_line.split('\t')[2:]
gmt[inst_term] = inst_elems
return gmt
def load_data_to_net(net, inst_net):
''' load data into nodes and mat, also convert mat to numpy array'''
net.dat['nodes'] = inst_net['nodes']
net.dat['mat'] = inst_net['mat']
data_formats.mat_to_numpy_arr(net)