/
data_gen.py
110 lines (89 loc) · 3.72 KB
/
data_gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import pafy
import cv2
import os
import pandas as pd
import numpy as np
# corresponding to the present arrangement
path= '..' #put the root folder in HERE
# view = {'1B' #as default
# '1F': 'Front View',
# '1B': 'Back View',
# '1L': 'Left View',
# '1R': 'Right View',
# '2': ' Bicycle
# '5H': 'High-Density Human Crowd',
# '5L': 'Low-Density Human Crowd',
# '0': 'irrelevant TV }
length = [12550, 105072, 102525, 80818, 200570, 228546, 199242]
urls = ['https://www.youtube.com/watch?v=5AJlYeZ8Ilc',
'https://www.youtube.com/watch?v=QJBihoz38Qc',
'https://www.youtube.com/watch?v=w1CE1ykYa3I',
'https://www.youtube.com/watch?v=HcRcstqHnR4',
'https://www.youtube.com/watch?v=70-1-hFnpjM',
'https://www.youtube.com/watch?v=bsLe0awGvzg',
'https://www.youtube.com/watch?v=kYPYD_RxHWU']
vid = [1, 4, 5, 6, 7, 8, 9]
annot= [None]*7
annot0 = pd.read_csv(path+'/datasets/giro_data/annot/giro1.txt', header = None ,sep = ' ' )
annot1 = pd.read_csv(path+'/datasets/giro_data/annot/giro4.txt', header = None ,sep = ' ' )
annot2 = pd.read_csv(path+'/datasets/giro_data/annot/giro5.txt', header = None ,sep = ' ' )
annot3 = pd.read_csv(path+'/datasets/giro_data/annot/giro6.txt', header = None ,sep = ' ' )
annot4 = pd.read_csv(path+'/datasets/giro_data/annot/giro7.txt', header = None ,sep = ' ' )
annot5 = pd.read_csv(path+'/datasets/giro_data/annot/giro8.txt', header = None ,sep = ' ' )
annot6 = pd.read_csv(path+'/datasets/giro_data/annot/giro9.txt', header = None ,sep = ' ' )
annot=[annot0,annot1,annot2,annot3,annot4,annot5,annot6]
for k in range(len(annot)):
annot[k] = annot[k].rename(columns={0: "dr1", 1: "frame", 2: "ID", 3: "X1", 4: "Y1", 5: "X2", 6: "Y2", 7: "dr2", 8: "object", 9: "dr3"})
annot[k].drop(columns = ["dr1", "dr2", "dr3"], inplace = True)
annot[k]["vid"] = vid[k]
annot[k]["vid"] = annot[k]["vid"].astype(int)
file_name = ["1","4","5","6","7","8","9"]
test_split=0.1 # test split, modify if needed
val_split=0.05
#for j in range(len(length)):
for j in [0,1,5,6]:
vPafy = pafy.new(urls[j])
play = vPafy.getbest(preftype="mp4")
cap = cv2.VideoCapture(play.url)
basepath=os.path.join(path,'datasets/giro_data/images/giro'+file_name[j])
trainpath=os.path.join(basepath,'train')
testpath=os.path.join(basepath,'test')
valpath=os.path.join(basepath,'valid')
print('images saved to:')
print(trainpath)
print(testpath)
try:
os.mkdir(basepath)
except:
print('exception: base directory already exists?')
try:
os.mkdir(trainpath)
except:
print('exception: train directory already exists?')
try:
os.mkdir(testpath)
except:
print('exception: test directory already exists?')
try:
os.mkdir(valpath)
except:
print('exception: test directory already exists?')
af=list(annot[j].frame.unique())
#af = list(annot[j][annot[j]["object"] == view].frame.unique())
i=0
while (True and i <= length[j]):
ret,frame = cap.read()
if ret == False:
break
if i in af:
decision_pm=np.random.rand()
if decision_pm<test_split:
cv2.imwrite(os.path.join(testpath,'giro'+str(file_name[j])+'_'+str(i)+'.jpg'), frame)
elif decision_pm>test_split+val_split:
cv2.imwrite(os.path.join(trainpath,'giro'+str(file_name[j])+'_'+str(i)+'.jpg'), frame)
else:
cv2.imwrite(os.path.join(valpath,'giro'+str(file_name[j])+'_'+str(i)+'.jpg'), frame)
i+=1
# break # for testing purposes, use only the first video
cap.release()
cv2.destroyAllWindows()