-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathrename_webanno.py
103 lines (80 loc) · 4.13 KB
/
rename_webanno.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import glob
import os
import pandas as pd
# This script can be used to rename the Appraisal or Negation annotated files from their idiosyncratic names to those
# generated by the comment counter. You will need the unzipped, exported project directory, as well as a mapping of
# WebAnno to comment counter names. (The mapping and zipped project are available on GitHub.)
# path to your unzipped exported project:
maindir = input("path to your unzipped exported project e.g. 'C:/.../project_name'")
# path to your mapping of names:
mapping_csv = input("path to your mapping of names e.g. 'C:/.../comment_counter_appraisal_mapping.csv'")
# get the subfolders where annotations are
annotations1dir = maindir + '/annotation'
annotations2dir = maindir + '/annotation_ser'
curations1dir = maindir + '/curation'
curations2dir = maindir + '/curation_ser'
sourcedir = maindir + '/source'
# then the files for each annotation
def getcontents(directory):
"""
Returns the file paths for all files in the specified path (directory).
"""
return [name.replace('\\', '/') for name in glob.glob(directory + '/*')]
annotations1 = getcontents(annotations1dir)
annotations2 = getcontents(annotations2dir)
curations1 = getcontents(curations1dir)
curations2 = getcontents(curations2dir)
sources = getcontents(sourcedir)
# make sure all files end in .txt (some may have been .tsv)
def ziplist(oldlist, newlist):
"""
:param oldlist: an iterable
:param newlist: another iterable
:return: list from two iterables oldlist and newlist, where the ith element of oldlist is the first element of the
ith sub-list and the ith element of newlist is the second element of the ith sub-list.
"""
return [[oldlist[i], newlist[i]]
for i in range(max(len(oldlist), len(newlist)))]
def cleanfilenames(files, directory):
"""
:param files: a list of paths to files
:param directory: the directory common to those files (used to rename them)
:return: a ziplist where each sublist's first element is the original filename and the second element is that
name with a .txt extension instead.
This function will not work if file extensions are more than 3 characters.
"""
sourcenames = [name[(len(directory) + 1):] for name in files]
cleannames = [name[(len(directory) + 1):-3] + 'txt' for name in files]
return ziplist(sourcenames, cleannames)
cleanann1 = cleanfilenames(annotations1, annotations1dir)
cleanann2 = cleanfilenames(annotations2, annotations2dir)
cleancur1 = cleanfilenames(curations1, curations1dir)
cleancur2 = cleanfilenames(curations2, curations2dir)
cleansources = cleanfilenames(sources, sourcedir)
# prepare to rename those files so they end in .txt
def rename_file(directory, pattern, titlepattern):
for pathAndFilename in glob.iglob(os.path.join(directory, pattern)):
os.rename(pathAndFilename, os.path.join(directory, titlepattern))
def massrename(directory, dictionary, confirmation='Done!', check=0):
for i in range(len(dictionary)):
if check == 1:
print([dictionary[i][0], dictionary[i][1]])
rename_file(directory, dictionary[i][0], dictionary[i][1])
print(confirmation)
# execute massrename to change file extensions
massrename(annotations1dir, cleanann1, confirmation='ann1')
massrename(annotations2dir, cleanann2, confirmation='ann2')
massrename(curations1dir, cleancur1, confirmation='cur1')
massrename(curations2dir, cleancur2, confirmation='cur2')
massrename(sourcedir, cleansources, confirmation='source')
# get mapping mappings into Python from csv
mapping1 = pd.read_csv(mapping_csv)
list1 = mapping1['appraisal_negation_annotation_file_name'].tolist()
list2 = mapping1['comment_counter'].tolist()
mapping = ziplist(list1, list2)
# rename files according to mapping (e.g. source_....)
massrename(annotations1dir, mapping, confirmation='ann1')
massrename(annotations2dir, mapping, confirmation='ann2')
massrename(curations1dir, mapping, confirmation='cur1')
massrename(curations2dir, mapping, confirmation='cur2')
massrename(sourcedir, mapping, confirmation='source')