In [2]:
import spacy
import pandas as pd
from collections import defaultdict

# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

# Load the CSV file
file_path = 'test_data_10rows.csv'
data = pd.read_csv(file_path)

# Function to perform POS tagging and return the tags
def pos_tags(text):
    doc = nlp(text)
    return [(token.text, token.pos_) for token in doc]

# Apply the function to the Question column and collect all unique POS tags
data['POS_Tags'] = data['Question'].apply(pos_tags)

# Collect all unique POS tags
unique_pos_tags = set()
for tags in data['POS_Tags']:
    for _, pos in tags:
        unique_pos_tags.add(pos)

# Create a column for each unique POS tag
for pos in unique_pos_tags:
    data[pos] = ""

# Fill the respective tokens into the appropriate columns
for index, row in data.iterrows():
    pos_dict = defaultdict(list)
    for token, pos in row['POS_Tags']:
        pos_dict[pos].append(token)
    for pos, tokens in pos_dict.items():
        data.at[index, pos] = ' '.join(tokens)

# Drop the temporary POS_Tags column
data.drop(columns=['POS_Tags'], inplace=True)

# Save the updated DataFrame to a new CSV file
output_file_path = 'POS_out.csv'
data.to_csv(output_file_path, index=False)

print(f"Updated file saved to {output_file_path}")


Updated file saved to POS_out.csv
