In [15]:
import pandas as pd
import ast

### Import data

In [16]:
# select excel or csv file
df = pd.read_excel('./Jan_100_.xlsx').iloc[1:99,:]

### Function for adding labels

In [17]:
def tag_preconditions_with_exclusions(tokens, preconditions, srl_tags):
    # Create a list for the new tags, skipping 'X' in 'preconditions'
    precondition_tags = ["Precondition" if word != 'X' else 'X' for word in preconditions]

    # Calculate the length of the 'preconditions' list
    len_preconditions = len(preconditions)

    # Iterate through the 'tokens' list and check for match with 'preconditions'
    for i in range(len(tokens) - len_preconditions + 1):
        # Check if the next sequence of tokens matches 'preconditions'
        if all(t == p or p == 'X' for t, p in zip(tokens[i:i+len_preconditions], preconditions)):
            # Replace the matching tags in 'srl_tags' with 'precondition_tags', but skip 'X'
            for j in range(len_preconditions):
                if preconditions[j] != 'X':
                    srl_tags[i+j] = precondition_tags[j]
            break  # Stop after finding the first match

    return srl_tags

In [18]:
def fix_quotes(s):
    # Correcting quotes in a string representation of a list
    s = s.strip("[]")  # Remove the surrounding brackets
    items = s.split(",")  # Split the string into items
    corrected_items = []
    for item in items:
        item = item.strip()  # Remove leading/trailing whitespace
        if item:
            # Remove extra quotes and re-add a single set of quotes
            item = "'" + item.strip("'\" ") + "'"
        corrected_items.append(item)
    return "[" + ", ".join(corrected_items) + "]"

In [19]:
# Correct the quoting in the 'preconditions' column
df['preconditions'] = df['preconditions'].apply(fix_quotes)

### Apply function to dataframe

In [20]:
df['tokens'] = df['tokens'].apply(ast.literal_eval)
df['preconditions'] = df['preconditions'].apply(ast.literal_eval)
df['srl_tags'] = df['srl_tags'].apply(ast.literal_eval)

# Apply the function to each row and create a new column 'new_srl_tags'
df['new_srl_tags'] = df.apply(lambda row: tag_preconditions_with_exclusions(row['tokens'], row['preconditions'], row['srl_tags']), axis=1)

### Save dataframe as new csv file

In [22]:
df.to_csv('updated_tags.csv', index=False)