-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_datasets.py
94 lines (68 loc) · 3.11 KB
/
create_datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import utils.utils_image as util
import os
import stat
import numpy as np
def remove_baby(path_lst):
path_lst[:] = [p for p in path_lst if os.stat(p)[stat.ST_SIZE] > 5000000]
return path_lst
def match_files(path_lst1, path_lst2):
lst1 = [os.path.basename(p1) for p1 in path_lst1]
lst2 = [os.path.basename(p2) for p2 in path_lst2]
paths1 = [p1 for p1 in path_lst1 if os.path.basename(p1) in lst2]
paths2 = [p2 for p2 in path_lst2 if os.path.basename(p2) in lst1]
#lst2[:] = [f2 for f2 in lst2 if f2 in lst1]
return paths1, paths2
def match_files2(path_lst1, path_lst2):
lst1 = [os.path.basename(p1)[:-7] for p1 in path_lst1]
lst2 = [os.path.basename(p2)[:-7] for p2 in path_lst2 if os.path.basename(p2) in lst1]
lst1[:] = [f1 for f1 in lst1 if f1 in lst2]
#lst2[:] = [f2 for f2 in lst2 if f2 in lst1]
return lst1, lst2
paths_H1=None
paths_H2=None
paths_L1=None
paths_L2=None
#test = "/lustre06/project/6003167/SharedProject4Kto8K_SDRfixedReinhard/4K_Stelios/c06_Drama_standingup_4K/001_c06_Drama_standingup_4K_PQ.exr"
paths_H1 = util.get_image_paths("/lustre06/project/6003167/Share_4K8K/4K_PQ_EXR")
paths_H2 = util.get_image_paths("/lustre06/project/6003167/shared_itmo_fixed/Garden/HDR")
paths_L1 = util.get_image_paths("/lustre06/project/6003167/SharedProject4Kto8K_SDRfixedReinhard/4K_Stelios")
paths_L2 = util.get_image_paths("/lustre06/project/6003167/shared_itmo_fixed/Garden/SDR")
print("Number of images in H1 before trim = "+str(len(paths_H1)))
print("Number of images in H2 before trim = "+str(len(paths_H2)))
print("Number of images in L1 before trim = "+str(len(paths_L1)))
print("Number of images in L2 before trim = "+str(len(paths_L2)))
paths_L1 = remove_baby(paths_L1)
paths_L2 = remove_baby(paths_L2)
#print(paths_H1[10])
#os.path.basename(paths_H1[10])
#print(paths_H1[10])
#print(os.path.basename(paths_H2[50])[:-7])
#print(os.path.basename(paths_L2[50])[:-7])
#paths_H1[:] = [h for h in paths_H1 if h in
#full_name = os.path.basename(file_path)
paths_H1, paths_L1 = match_files(paths_H1, paths_L1)
#paths_H2, paths_L2 = match_files2(paths_H2, paths_L2)
print("Number of images in H1 = "+str(len(paths_H1)))
print("Number of images in H2 = "+str(len(paths_H2)))
print("Number of images in L1 = "+str(len(paths_L1)))
print("Number of images in L2 = "+str(len(paths_L2)))
paths_H = paths_H1 + paths_H2
paths_L = paths_L1 + paths_L2
paths = np.array(tuple(zip(paths_H, paths_L)))
np.random.shuffle(paths)
#print(paths.shape)
#paths.reshape(-1,2)
#print(paths.shape)
paths = list(map(tuple, paths))
#print(round(0.9*(len(paths))))
#print(round(0.9*(len(paths)))+1)
data_split = round(0.9*(len(paths)))+1
print(paths[10])
print(paths[-10])
#print(paths[521])
with open('train_itmo_dirs.txt', 'w') as fp:
fp.write('\n'.join('%s,%s' % x for x in paths[:data_split]))
with open('test_itmo_dirs.txt', 'w') as fp:
fp.write('\n'.join('%s,%s' % x for x in paths[data_split+1:]))
#print("Sample path in H1: " + str(paths_H1[0]))
#print("Size of sample image: " + str(os.stat(test)[stat.ST_SIZE]))