Katherine Kairis, kak275@pitt.edu, 11/2/2017

NEW CONTINUING -- This file continues upon the first progress report

In [1]:
from bs4 import BeautifulSoup
import glob
import re
import nltk

In [2]:
transcripts = glob.glob('data/VOICE/VOICE2.0XML/XML/*.xml')
del transcripts[0]

tagged_transcripts = glob.glob('data/VOICE/VOICEPOSXML2.0/XML/*.xml')
del transcripts[0]

In [3]:
#Create three dictionaries
participants = {}
conversations = {}
tagged_convs = {}

## Getting info about the participants

In [4]:
def participant_info(contents):
    
    #Get all of the participants in the given conversation
    people = contents.find('listPerson', {'type': 'identified'}).findAll('person')
    
    for p in people:
        #info is a subdirector that contains a single participant's information. It will be 
        info = {}
        name = p['xml:id']
        c = name.split("_")[0]
        c = c + ".xml"
        
        info['conversation'] = c
        info['role'] = p['role']
        info['age'] = p.age.get_text()
        info['sex'] = p.sex.get_text()
        
        #In some cases, the occupation isn't listed. If it is included, get the text of the occupation field.
        #If it isn't included, "None" will be stored as the occupation, since p.occupation would return "None."
        try:
            info['occupation'] = p.occupation.get_text()
        except AttributeError:
            info['occupation'] = p.occupation
        
        #Get a list of the languages that the participant speaks. Iterate through the list, and add them to the
        #dictionary according to the speaker's level (ie. L1).
        languages = p.findAll('langKnown')
        for l in languages:
            level = l['level']
            language = l['tag']
            language = language.split('-')[0]
        
            if level in info:
                info[level].append(language)
            else:
                info[level] = [language]
    
        #Get the participant's ID number, and make it a key in the participants dictionary. The value will be
        #the info dictionary
        participants[name] = info

## Getting lines of the conversation from the file

In [5]:
def conversation_lines(file, contents, li):
    file_name = file.split("/")[-1]
    li[file_name] = contents

In [6]:
conversations = {}
for t in transcripts:
    file = open(t, 'r')
    text = file.read()
    xml_contents = BeautifulSoup(text, 'xml')
    conversation_lines(t, xml_contents, conversations)
    participant_info(xml_contents)

In [7]:
len(conversations)

150

In [8]:
#Get native English speakers
native_speakers = []

#There are multiple ways that English is listed as an L1 ("eng", "eng-US", "eng-CA", "eng-GB", "eng-GY", "eng-AU", etc)
#I used a regular expression to find all of these instances
r = re.compile("eng.*")

for person in participants:
    
    #returns a list of all languages that contain "eng.*" The length of this list should be 1 or 0. If it's 1, the
    #participant has English listed as an L1.
    english = list(filter(r.match, participants[person]['L1']))
    
    if len(english) != 0:
        #print(person, ':', participants[person])
        native_speakers.append(person)

In [9]:
bilinguals = []
L1_counts = {}
#participants[native_speakers[0]]
for p in participants:
    #print(participants[p]['L1'])
    languages = participants[p]['L1']
    if len(languages) > 1:
        bilinguals.append(p)
        if p in native_speakers:
            if 'eng' not in L1_counts:
                L1_counts['eng'] = 1
            else:
                L1_counts['eng'] += 1
        continue
    
    
    for l in languages:
        L1 = l.split("-")[0]
        if L1 not in L1_counts:
            L1_counts[L1] = 1
        else:
            L1_counts[L1] += 1
        #print(l.split("-")[0])

In [10]:
modified_conversations = {}
lines = {}

for file in conversations:
    conv_lines = {}
    
    c = conversations[file].findAll('u')

In [11]:
#Checks to make sure the line can be added to the dictionary.
#A line must meet the following criteria: 
#the participant cannot be a native speaker of English
#the participant must be listed in the participant directory
#the participant cannot be bilingual
#the line cannot contain any non-English words
#the line cannot contain the speaker reading anything out loud
def valid_utterance(participant, line):
    if participant in bilinguals:
        return False
    if participant not in participants:
        return False
    #if len(text) == 0:
    #    return False
    if line.foreign != None:
        return False
    if line.unclear != None: 
        return False
    if line.reading_aloud != None:
        return False
    if line.reading != None :
        return False
    return True

In [12]:
#Iterate through all of the files in VOICE to a nested dictionary that contains the word tokens of the
#conversations.
#The keys of the dictionaries are the file names. The values of these entries are subdictionaries. The keys of the
#subdictionary are (participant, line_number) tuples, and the values are lists of tokens.
tokenized_conversations = {}
conversation_info = {}

for file in conversations:
    conv_lines = {}
    participant_list = []
    
    c = conversations[file].findAll('u')
    
    for l in c:
        participant = l['who'].replace("#", "")
        line_id = l['xml:id']
        text = l.get_text()
        tokens = nltk.word_tokenize(text)
        
        participant_list.append(participant)
        
        if participant in native_speakers:
            continue
        
        elif len(text) != 0 and valid_utterance(participant, l) == True:
            key = (line_id, participant)
            conv_lines[key] = tokens
            #participant_list.append(participant)
    
    tokenized_conversations[file] = conv_lines
    
    participant_list = list(set(participant_list))
    print(participant_list)
    total = 0
    num_natives = 0
    num_nonnatives = 0
    for p in participant_list:
        total += 1
        if p in native_speakers:
            num_natives += 1
        else:
            num_nonnatives += 1
            
    
    current_info = {'total_participants':total, 'native_speakers':num_natives, 'nonnative_speakers':num_nonnatives}
    conversation_info[file] = current_info

['EDcon4_S2', 'EDcon4_S4', 'EDcon4_SX-2', 'EDcon4_SX-4', 'EDcon4_S5', 'EDcon4_S6', 'EDcon4_SX-7', 'EDcon4_SX-6', 'EDcon4_S9', 'EDcon4_SX-f', 'EDcon4_SX-3', 'EDcon4_SX-1', 'EDcon4_S7', 'EDcon4_SS', 'EDcon4_S8', 'EDcon4_S3', 'EDcon4_S1']
['EDcon496_S1', 'EDcon496_S4', 'EDcon496_S5', 'EDcon496_S2', 'EDcon496_S3', 'EDcon496_SX-2', 'EDcon496_SX-f']
['EDcon521_SX-m', 'EDcon521_S9', 'EDcon521_S13', 'EDcon521_SX-f', 'EDcon521_SX-3', 'EDcon521_S12', 'EDcon521_SX-2', 'EDcon521_SX-5', 'EDcon521_S1', 'EDcon521_S10', 'EDcon521_S15', 'EDcon521_S5', 'EDcon521_S8', 'EDcon521_SX-8', 'EDcon521_SX-6', 'EDcon521_S6', 'EDcon521_SX-1', 'EDcon521_SX-14', 'EDcon521_SX', 'EDcon521_SX-10', 'EDcon521_S3', 'EDcon521_S7', 'EDcon521_S4', 'EDcon521_S14', 'EDcon521_S11', 'EDcon521_S2', 'EDcon521_SS']
['EDint328_S4', 'EDint328_S2', 'EDint328_S1', 'EDint328_SS', 'EDint328_S3']
['EDint330_SS', 'EDint330_SX-f', 'EDint330_S4', 'EDint330_S2', 'EDint330_S1', 'EDint330_S3', 'EDint330_S5']
['EDint331_S1', 'EDint331_S2']
['EDi

['EDwsd306_SX-6', 'EDwsd306_SX-8', 'EDwsd306_S14', 'EDwsd306_S4', 'EDwsd306_SX-7', 'EDwsd306_S20', 'EDwsd306_SX-17', 'EDwsd306_SX-10', 'EDwsd306_S18', 'EDwsd306_SX', 'EDwsd306_SX-2', 'EDwsd306_SX-19', 'EDwsd306_SX-15', 'EDwsd306_S22', 'EDwsd306_S7', 'EDwsd306_SX-22', 'EDwsd306_S16', 'EDwsd306_S11', 'EDwsd306_SX-9', 'EDwsd306_SX-20', 'EDwsd306_S2', 'EDwsd306_SX-m', 'EDwsd306_S9', 'EDwsd306_S15', 'EDwsd306_S19', 'EDwsd306_SX-f', 'EDwsd306_SX-14', 'EDwsd306_S6', 'EDwsd306_S1', 'EDwsd306_SX-12', 'EDwsd306_SX-11', 'EDwsd306_S12', 'EDwsd306_SX-1', 'EDwsd306_S17', 'EDwsd306_SS', 'EDwsd306_S8', 'EDwsd306_SX-4']
['EDwsd464_S11', 'EDwsd464_SX-25', 'EDwsd464_SX-2', 'EDwsd464_SX-4', 'EDwsd464_S24', 'EDwsd464_S17', 'EDwsd464_S23', 'EDwsd464_SX-17', 'EDwsd464_SX-10', 'EDwsd464_SX', 'EDwsd464_S25', 'EDwsd464_S9', 'EDwsd464_S3', 'EDwsd464_SX-f', 'EDwsd464_S1', 'EDwsd464_S22', 'EDwsd464_S13', 'EDwsd464_SX-12', 'EDwsd464_S8', 'EDwsd464_S18', 'EDwsd464_S15', 'EDwsd464_SX-m', 'EDwsd464_S16', 'EDwsd464_SS'

['PBqas412_S3', 'PBqas412_SX-m', 'PBqas412_S6', 'PBqas412_S11', 'PBqas412_S8', 'PBqas412_S12', 'PBqas412_S16', 'PBqas412_S5', 'PBqas412_S15', 'PBqas412_S13', 'PBqas412_S9', 'PBqas412_S10', 'PBqas412_S1', 'PBqas412_S4', 'PBqas412_S2', 'PBqas412_S7', 'PBqas412_S14']
['PBqas523_S4', 'PBqas523_S1', 'PBqas523_S5', 'PBqas523_S2', 'PBqas523_S3']
['PBsve426_S1', 'PBsve426_S2']
['PBsve430_S1', 'PBsve430_S2']
['PBsve434_S2', 'PBsve434_S1']
['PBsve435_S2', 'PBsve435_S1']
['PBsve436_S3', 'PBsve436_S2', 'PBsve436_S1', 'PBsve436_SX-2']
['PBsve437_S2', 'PBsve437_S1']
['POcon543_SX-4', 'POcon543_SX-f', 'POcon543_S6', 'POcon543_S5', 'POcon543_S1', 'POcon543_S3', 'POcon543_S4', 'POcon543_SX-2', 'POcon543_SS', 'POcon543_SX-m', 'POcon543_SX-7', 'POcon543_SX-5', 'POcon543_S2', 'POcon543_SX', 'POcon543_SX-3', 'POcon543_SX-6', 'POcon543_S7']
['POcon549_S7', 'POcon549_S9', 'POcon549_SX-11', 'POcon549_S1', 'POcon549_SX', 'POcon549_SS', 'POcon549_S3', 'POcon549_S8', 'POcon549_S4', 'POcon549_S2', 'POcon549_SX-f'

['POwgd37_SX-11', 'POwgd37_S18', 'POwgd37_S17', 'POwgd37_SX-7', 'POwgd37_S14', 'POwgd37_SX-m', 'POwgd37_SX-12', 'POwgd37_S11', 'POwgd37_S15', 'POwgd37_S7', 'POwgd37_S13', 'POwgd37_S8', 'POwgd37_SS', 'POwgd37_SX-14', 'POwgd37_SX-15', 'POwgd37_S9', 'POwgd37_S10', 'POwgd37_S16', 'POwgd37_SX-f', 'POwgd37_SX-5', 'POwgd37_S5', 'POwgd37_S12']
['POwgd375_SX-f', 'POwgd375_SS', 'POwgd375_S9', 'POwgd375_S1', 'POwgd375_S3', 'POwgd375_SX-9', 'POwgd375_S4', 'POwgd375_S10', 'POwgd375_SX-3', 'POwgd375_SX-7', 'POwgd375_SX-m', 'POwgd375_SX', 'POwgd375_SX-4', 'POwgd375_S7', 'POwgd375_SX-10']
['POwgd378_S12', 'POwgd378_S3', 'POwgd378_SS', 'POwgd378_S1', 'POwgd378_S8', 'POwgd378_S7']
['POwgd442_SS', 'POwgd442_S4', 'POwgd442_S5', 'POwgd442_S1', 'POwgd442_S2', 'POwgd442_S6', 'POwgd442_S3']
['POwgd449_S3', 'POwgd449_S4', 'POwgd449_S1', 'POwgd449_S7', 'POwgd449_SS', 'POwgd449_S6', 'POwgd449_S5', 'POwgd449_S8', 'POwgd449_S2', 'POwgd449_SX-f', 'POwgd449_SX']
['POwgd510_S1', 'POwgd510_S9', 'POwgd510_SX-m', 'POwgd

['PRwgd537_S11', 'PRwgd537_S10']


In [13]:
#Get the text from the pos-tagged files
tagged_conv_lines = {}
for t in tagged_transcripts:
    file = open(t, 'r')
    text = file.read()
    xml_contents = BeautifulSoup(text, 'xml')
    conversation_lines(t, xml_contents, tagged_conv_lines)

In [14]:
#Iterate through all of the files in VOICE to a nested dictionary that contains the (word, tag) tuples from the
#conversations.
#The keys of the dictionaries are the file names. The values of these entries are subdictionaries. The keys of the
#subdictionary are (participant, line_number) tuples, and the values are lists of (word, tag) tuples.
tagged_conversations = {}
tagged_conversations_native = {}

for file in tagged_conv_lines:
    conv_lines = {}
    conv_lines_native = {}
    c = tagged_conv_lines[file].findAll('u')
    
    for l in c:
        utterance = []
        native_utterance = []
        
        participant = l['who'].replace("#", "")
        line_id = l['xml:id']
        key = (line_id, participant)
        
        #if participant in native_speakers:
        #    continue
        
        if valid_utterance(participant, l) == True:            
            tags = l.findAll('w')
            for t in tags:
                word = t.text
                ana = str(t).split()[1]
                ana = ana.split("=")
                tag = ana[1][2:]
                tag = tag.split('"')[0]
                #print(word, tag)
                
                if participant in native_speakers:
                    native_utterance.append((word, tag))
                    conv_lines_native[key] = native_utterance
                    
                else:
                    utterance.append((word, tag))  
                    conv_lines[key] = utterance
        
    tagged_conversations[file] = conv_lines
    tagged_conversations_native[file] = conv_lines_native

In [15]:
tokenized_conversations['EDcon4.xml']

{('EDcon4_u_1002', 'EDcon4_S2'): ['is', 'this', 'a', 'statement'],
 ('EDcon4_u_1003', 'EDcon4_S1'): ['no', 'you', "'re", 'overreacting'],
 ('EDcon4_u_1004', 'EDcon4_S2'): ['ah',
  'it',
  'means',
  'i',
  "'m",
  'clean',
  'that',
  'i',
  'wash',
  'my',
  'clothes',
  'and',
  'i',
  'wash',
  'myself',
  'also'],
 ('EDcon4_u_1005', 'EDcon4_S1'): ['i',
  'ca',
  "n't",
  'see',
  'him',
  'at',
  'town'],
 ('EDcon4_u_1006', 'EDcon4_S2'): ['what', 'you', "'re", 'what'],
 ('EDcon4_u_1007', 'EDcon4_S1'): ['[', 'first', 'name8', ']'],
 ('EDcon4_u_1008', 'EDcon4_S2'): ['yeah'],
 ('EDcon4_u_1009', 'EDcon4_S1'): ['someone',
  'is',
  'oh',
  '[',
  'S6',
  ']',
  'is',
  'really',
  'good',
  'at',
  'imitating'],
 ('EDcon4_u_1010', 'EDcon4_S2'): ['hm'],
 ('EDcon4_u_1011', 'EDcon4_S1'): ['[',
  'S6',
  ']',
  'is',
  'really',
  'good',
  'at',
  'imitating',
  'her',
  'voice'],
 ('EDcon4_u_1012', 'EDcon4_S2'): ['[', 'S6', ']'],
 ('EDcon4_u_1014', 'EDcon4_S2'): ['it', "'s", 'nice'],
 ('E

In [16]:
tagged_conversations['EDcon4.xml']

{('EDcon4_u_1', 'EDcon4_S1'): [('running', 'VVGfVVG'),
  ('we', 'PPfPP'),
  ('got', 'VVDfVVD'),
  ('_0', 'PAfPA')],
 ('EDcon4_u_2', 'EDcon4_S2'): [('with', 'INfIN'), ('whom', 'WPfWP')],
 ('EDcon4_u_3', 'EDcon4_S1'): [('a_[firstname1]', 'NPfNP'),
  ('_0', 'PAfPA'),
  ('the', 'DTfDT'),
  ('belgium', 'NPfNP'),
  ('_0', 'PAfPA')],
 ('EDcon4_u_4', 'EDcon4_S2'): [('okay', 'REfRE'),
  ('@@', 'LAfLA'),
  ('_0', 'PAfPA')],
 ('EDcon4_u_5', 'EDcon4_S1'): [('and', 'CCfCC'),
  ('@@', 'LAfLA'),
  ('and', 'CCfCC'),
  ('er', 'UHfUH'),
  ('_0', 'PAfPA')],
 ('EDcon4_u_6', 'EDcon4_S3'): [('oh', 'UHfUH'),
  ('_0', 'PAfPA'),
  ('sorry', 'FIfFI'),
  ('_1', 'PAfPA')],
 ('EDcon4_u_7', 'EDcon4_S1'): [('and', 'CCfCC'),
  ('the', 'DTfDT'),
  ('problem', 'NNfNN'),
  ('was', 'VBDfVBD'),
  ('that', 'INfIN'),
  ('she', 'PPfPP'),
  ('was', 'VBDfVBD'),
  ('like', 'DMfDM'),
  ('running', 'VVGfVVG')],
 ('EDcon4_u_8', 'EDcon4_S2'): [('@@', 'LAfLA'), ('@', 'LAfLA')],
 ('EDcon4_u_9', 'EDcon4_S1'): [('the', 'DTfDT'),
  ('wh

In [17]:
#Save the two dictionaries as pickle files
import pickle

In [18]:
f = open('VOICE_tokenized.p', 'wb')
pickle.dump(tokenized_conversations, f, -1)
f.close()

In [19]:
f = open('VOICE_tagged.p', 'wb')
pickle.dump(tagged_conversations, f, -1)
f.close()

In [32]:
f = open('VOICE_native_tagged.p', 'wb')
pickle.dump(tagged_conversations_native, f, -1)
f.close()

In [20]:
f = open("VOICE_participant_info.p", 'wb')
pickle.dump(participants, f, -1)
f.close()

In [21]:
f = open("VOICE_conversation_info.p", 'wb')
pickle.dump(conversation_info, f, -1)
f.close()

In [22]:
participants.keys()

dict_keys(['EDcon4_S4', 'EDcon4_S1', 'EDcon4_S2', 'EDcon4_S5', 'EDcon4_S6', 'EDcon4_S7', 'EDcon4_S8', 'EDcon4_S3', 'EDcon4_S9', 'EDcon496_S4', 'EDcon496_S1', 'EDcon496_S2', 'EDcon496_S3', 'EDcon496_S5', 'EDcon521_S9', 'EDcon521_S15', 'EDcon521_S14', 'EDcon521_S13', 'EDcon521_S12', 'EDcon521_S10', 'EDcon521_S11', 'EDcon521_S8', 'EDcon521_S7', 'EDcon521_S6', 'EDcon521_S5', 'EDcon521_S4', 'EDcon521_S3', 'EDcon521_S2', 'EDcon521_S1', 'EDint328_S1', 'EDint328_S2', 'EDint328_S3', 'EDint328_S4', 'EDint330_S1', 'EDint330_S2', 'EDint330_S3', 'EDint330_S4', 'EDint330_S5', 'EDint331_S1', 'EDint331_S2', 'EDint604_S3', 'EDint604_S4', 'EDint604_S1', 'EDint605_S5', 'EDint605_S6', 'EDint605_S1', 'EDsed251_S1', 'EDsed251_S3', 'EDsed251_S4', 'EDsed251_S5', 'EDsed251_S6', 'EDsed251_S7', 'EDsed251_S8', 'EDsed251_S9', 'EDsed251_S10', 'EDsed251_S11', 'EDsed251_S12', 'EDsed251_S13', 'EDsed251_S14', 'EDsed251_S15', 'EDsed251_S16', 'EDsed251_S17', 'EDsed251_S18', 'EDsed251_S19', 'EDsed251_S2', 'EDsed251_S20', 

In [23]:
participants['EDcon4_S1']

{'L1': ['pol'],
 'age': '17-24',
 'conversation': 'EDcon4.xml',
 'occupation': 'student',
 'role': 'participant',
 'sex': 'female'}

In [24]:
participants['EDsed301_S7']

{'L1': ['rum'],
 'age': '25-34',
 'conversation': 'EDsed301.xml',
 'occupation': 'student',
 'role': 'student',
 'sex': 'female'}

In [25]:
conversation_info.keys()

dict_keys(['EDcon4.xml', 'EDcon496.xml', 'EDcon521.xml', 'EDint328.xml', 'EDint330.xml', 'EDint331.xml', 'EDint604.xml', 'EDint605.xml', 'EDsed251.xml', 'EDsed301.xml', 'EDsed31.xml', 'EDsed362.xml', 'EDsed363.xml', 'EDsed364.xml', 'EDsve421.xml', 'EDsve422.xml', 'EDsve423.xml', 'EDsve451.xml', 'EDsve452.xml', 'EDwgd241.xml', 'EDwgd305.xml', 'EDwgd497.xml', 'EDwgd5.xml', 'EDwgd6.xml', 'EDwsd15.xml', 'EDwsd242.xml', 'EDwsd302.xml', 'EDwsd303.xml', 'EDwsd304.xml', 'EDwsd306.xml', 'EDwsd464.xml', 'EDwsd499.xml', 'EDwsd590.xml', 'EDwsd9.xml', 'LEcon227.xml', 'LEcon228.xml', 'LEcon229.xml', 'LEcon329.xml', 'LEcon351.xml', 'LEcon352.xml', 'LEcon353.xml', 'LEcon405.xml', 'LEcon417.xml', 'LEcon418.xml', 'LEcon420.xml', 'LEcon545.xml', 'LEcon547.xml', 'LEcon548.xml', 'LEcon560.xml', 'LEcon562.xml', 'LEcon565.xml', 'LEcon566.xml', 'LEcon573.xml', 'LEcon575.xml', 'LEcon8.xml', 'LEint551.xml', 'LEint552.xml', 'LEint553.xml', 'LEint554.xml', 'LEint555.xml', 'PBcon594.xml', 'PBmtg269.xml', 'PBmtg27.

In [26]:
conversation_info['EDcon4.xml']

{'native_speakers': 0, 'nonnative_speakers': 17, 'total_participants': 17}

In [27]:
conversation_info['EDcon496.xml']

{'native_speakers': 1, 'nonnative_speakers': 6, 'total_participants': 7}

In [28]:
conversation_info['EDcon521.xml']

{'native_speakers': 1, 'nonnative_speakers': 26, 'total_participants': 27}

In [29]:
conversation_info['POcon549.xml']

{'native_speakers': 1, 'nonnative_speakers': 22, 'total_participants': 23}

In [30]:
conversation_info['PRint597.xml']

{'native_speakers': 0, 'nonnative_speakers': 6, 'total_participants': 6}

In [31]:
native_speakers

['EDcon496_S2',
 'EDcon521_S1',
 'EDint328_S3',
 'EDint330_S2',
 'EDint330_S4',
 'EDsed251_S3',
 'EDsed301_S6',
 'EDsed362_S1',
 'EDsed362_S11',
 'EDsed362_S14',
 'EDsed362_S17',
 'EDsed363_S3',
 'EDsed364_S7',
 'EDwgd497_S4',
 'EDwgd5_S3',
 'EDwgd6_S7',
 'EDwgd6_S11',
 'EDwsd15_S13',
 'EDwsd242_S7',
 'EDwsd302_S13',
 'EDwsd303_S13',
 'EDwsd304_S9',
 'EDwsd306_S11',
 'EDwsd590_S13',
 'EDwsd9_S2',
 'LEcon329_S4',
 'LEcon545_S7',
 'LEcon545_S1',
 'LEcon547_S4',
 'LEcon548_S4',
 'LEcon548_S5',
 'LEcon562_S2',
 'LEcon562_S3',
 'LEcon562_S6',
 'PBmtg280_S3',
 'PBmtg280_S4',
 'PBpan10_S9',
 'PBpan28_S7',
 'PBpan28_S9',
 'PBqas411_S1',
 'PBqas412_S6',
 'POcon549_S4',
 'POcon591_S10',
 'POmtg404_S5',
 'POmtg439_S2',
 'POmtg439_S3',
 'POmtg444_S5',
 'POmtg444_S9',
 'POmtg447_S2',
 'POmtg447_S3',
 'POmtg546_S9',
 'POprc522_S7',
 'POprc522_S8',
 'POprc558_S6',
 'POprc559_S10',
 'POprc559_S11',
 'POwgd12_S1',
 'POwgd12_S6',
 'POwgd37_S10',
 'POwgd375_S9',
 'POwgd449_S6',
 'POwgd449_S8',
 'POwgd510