-
Notifications
You must be signed in to change notification settings - Fork 9
/
xml2csv.py
84 lines (69 loc) · 2.51 KB
/
xml2csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env python
import xmltodict
import collections
import pandas as pd
import argparse
import multiprocessing as mp
import glob
def converter(data, output = {}, name = ""):
if type(data) is collections.OrderedDict:
for key in data.keys():
d_ = data[key]
if type(d_) is collections.OrderedDict:
output = converter(d_, output, name+key+".")
else:
if type(d_) is list:
for item in d_:
output = converter(item, output, name+key+".")
else:
if name+key in output.keys():
count = 0
while(1):
if name+key+'.'+str(count) not in output.keys():
output[name+key+'.'+str(count)] = d_
break
else:
count+=1
else:
output[name+key] = d_
return output
def parse(path):
with open(path,'r') as file:
xml = file.read()
xmlDict = xmltodict.parse(xml)
response = converter(xmlDict)
return response
def main():
try:
if(args.xml!=None):
file_list = args.xml
else:
file_list = glob.glob(args.bulk+'/*')
pool = mp.Pool(mp.cpu_count())
result = pool.map(parse, file_list)
df = pd.DataFrame(result)
df.to_csv(args.csv, index=False)
except Exception as e:
print(e)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--xml",
nargs='*',
help="Specify a xml file.")
parser.add_argument("-b", "--bulk",
nargs='?',
help="Specify path to xml files folder.")
parser.add_argument("-csv", "--csv",
nargs='?', const='converted.csv',
help="Specify the name of a csv file to write to.")
args = parser.parse_args()
if not args.xml and not args.bulk:
print("\n[-] Please specify an input file to parse."
"\nUse -f <file.xml> to specify the file."
"\nOr use -b <folder path> to specify the path to xml files")
exit()
if not args.csv:
print("\n[-] Please specify an output file name to put data. "
"Use -csv <file.csv> to specify the file\n")
exit()
main()