In [None]:
import spacy
import pandas as pd
import json

nlp = spacy.load("en_core_web_lg")

# remove "NOUN", "PROPN", "VERB", "ADJ", "ADV"
def process_sentence(sentence):
    doc = nlp(sentence)

    token_parts = {"NOUN": [], "PROPN": [], "VERB": [], "ADJ": [], "ADV": []}

    for token in doc:
        if token.pos_ in token_parts:
            token_parts[token.pos_].append(token.text)

    new_sentence = ' '.join(' '.join(token_parts[pos]) for pos in ["NOUN", "PROPN", "VERB", "ADJ", "ADV"])
    return new_sentence

def word_vector_analysis(texts, filename):
    with open(f"{filename}_vectors.json", 'w') as json_file:
        for text in texts:
            text_str = str(text) if not pd.isna(text) else ""
            processed_text = nlp(text_str)

            # Collect word vectors in a dictionary instead of a list
            vectors = {token.text: token.vector.tolist() for token in processed_text}

            # Write the result to the output file incrementally
            json.dump({text_str: vectors}, json_file)
            json_file.write('\n')


In [None]:
# Get CSV file path from user input
csv_file_path = input("Enter the path to the CSV file: ")

# Extract the variable name from the CSV file name
variable_name = csv_file_path.split('.')[0]

# Read the entire data
data = pd.read_csv(csv_file_path, encoding='latin1')

# Process reviews and write word vectors to JSON files
word_vector_analysis(data['Review Title'], f"{variable_name}_review_title")
word_vector_analysis(data['Likes'], f"{variable_name}_likes")
word_vector_analysis(data['Dislikes'], f"{variable_name}_dislikes")
