forked from rishucoding/532-perf_analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
skewed_dataset_generator.py
139 lines (133 loc) · 5.48 KB
/
skewed_dataset_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# num_tx = 100
# num_items = 100
num_tx = 50
num_items = 50
# lower triangular matrix
def gen_variant_1():
# with open('datasets/varying_num_items/dataset_100_100_50_variant_1.txt', 'w') as f:
with open('datasets/varying_num_tx/dataset_50_50_50_variant_1.txt', 'w') as f:
tx_rows = []
count = 0
for i in range(0, num_tx):
tx_rows.append((['1'] * (i + 1)) + (['0'] * (num_items - (i + 1))))
count += i + 1
for tx_row in tx_rows[:len(tx_rows) - 1]:
f.write(' '.join(tx_row) + '\n')
f.write(' '.join(tx_rows[-1]))
print('Validating distribution of values: ' + str(count) + ' / ' + str(num_tx * num_items))
# step distribution
def gen_variant_2():
# with open('datasets/varying_num_items/dataset_100_100_50_variant_2.txt', 'w') as f:
with open('datasets/varying_num_tx/dataset_50_50_50_variant_2.txt', 'w') as f:
tx_rows = []
count = 0
for i in range(0, num_tx):
if (i + 1) * 2 <= num_items:
tx_rows.append((['1'] * ((i + 1) * 2)) + (['0'] * (num_items - ((i + 1) * 2))))
count += ((i + 1) * 2)
else:
break
'''
tx_rows.append((['1'] * ((num_items - (i + 1)) * 2)) + (['0'] * (((i + 1) * 2) - num_items)))
count += ((num_items - (i + 1)) * 2)
'''
tx_rows_reverse = tx_rows[:len(tx_rows) - 1][::-1]
tx_rows += tx_rows_reverse
count *= 2
count -= num_items
for tx_row in tx_rows:
f.write(' '.join(tx_row) + '\n')
f.write(' '.join(['0'] * num_items))
print('Validating distribution of values: ' + str(count) + ' / ' + str(num_tx * num_items))
# gaussian/normal distribution
def gen_variant_3():
# with open('datasets/varying_num_items/dataset_100_100_50_variant_3.txt', 'w') as f:
with open('datasets/varying_num_tx/dataset_50_50_50_variant_3.txt', 'w') as f:
tx_rows = []
count = 0
# for i in range(0, num_tx):
offset = num_tx
for i in range(int(num_tx / 2), -1, -1):
if i >= 15:
tx_rows.append((['1'] * offset) + (['0'] * (num_tx - offset)))
count += offset
offset -= 1
else:
tx_rows.append((['1'] * offset) + (['0'] * (num_tx - offset)))
count += offset
offset -= 3
if offset < 0:
offset = 0
'''
if (i + 1) * 3 <= (num_items * 0.75):
tx_rows.append((['1'] * ((i + 1) * 3)) + (['0'] * (num_items - ((i + 1) * 3))))
count += ((i + 1) * 3)
if (i + 1) > (num_items * 0.25) and (i + 1) <= (num_items * 0.5):
tx_rows.append((['1'] * (int(num_items * 0.5) + i + 1)) + (['0'] * (num_items - (int(num_items * 0.5) + i + 1))))
count += ((num_items * 0.5) + i + 1)
if (i + 1) > (num_items * 0.5):
break
'''
# tx_rows_reverse = tx_rows[:len(tx_rows) - 1][::-1]
# tx_rows += tx_rows_reverse
tx_rows_reverse = tx_rows[1:][::-1]
tx_rows = tx_rows_reverse + tx_rows
count *= 2
count -= num_items
# for tx_row in tx_rows:
for tx_row in tx_rows[:len(tx_rows) - 2]:
f.write(' '.join(tx_row) + '\n')
# f.write(' '.join(tx_row[-2]) + '\n')
f.write(' '.join(['0'] * num_items))
print('Validating distribution of values: ' + str(int(count)) + ' / ' + str(num_tx * num_items))
# exponential distribution
def gen_variant_4():
# with open('datasets/varying_num_items/dataset_100_100_50_variant_4.txt', 'w') as f:
with open('datasets/varying_num_tx/dataset_50_50_50_variant_4.txt', 'w') as f:
tx_rows = []
count = 0
# for i in range(0, num_tx):
offset = num_tx
for i in range(int(num_tx / 2), -1, -1):
if i >= 15:
tx_rows.append((['1'] * offset) + (['0'] * (num_tx - offset)))
count += offset
offset -= 3
else:
tx_rows.append((['1'] * offset) + (['0'] * (num_tx - offset)))
count += offset
offset -= 1
if offset < 0:
offset = 0
'''
if (i + 1) <= (num_items * 0.25):
tx_rows.append((['1'] * (i + 1)) + (['0'] * (num_items - (i + 1))))
count += i + 1
offset = i + 1
if (i + 1) > (num_items * 0.25) and (i + 1) <= (num_items * 0.5):
offset += 3
tx_rows.append((['1'] * offset) + (['0'] * (num_items - offset)))
count += offset
if (i + 1) > (num_items * 0.5):
break
'''
# tx_rows_reverse = tx_rows[:len(tx_rows) - 1][::-1]
# tx_rows += tx_rows_reverse
tx_rows_reverse = tx_rows[1:][::-1]
tx_rows = tx_rows_reverse + tx_rows
count *= 2
count -= num_items
# for tx_row in tx_rows:
for tx_row in tx_rows[:len(tx_rows) - 2]:
f.write(' '.join(tx_row) + '\n')
f.write(' '.join(['1'] * 4) + ' ' + ' '.join(['0'] * (num_items - 4)))
count += 4
# f.write(' '.join(['0'] * num_items))
print('Validating distribution of values: ' + str(int(count)) + ' / ' + str(num_tx * num_items))
def main():
gen_variant_1()
gen_variant_2()
gen_variant_3()
gen_variant_4()
if __name__ == '__main__':
main()