In [None]:
import csv

def compare_csv_words(file1, file2):
    """
    Compare words in the first column of two CSV files and
    return the number of common words and the sets involved.

    Args:
        file1 (str): Path to the first CSV file
        file2 (str): Path to the second CSV file

    Returns:
        tuple: (count_common, common_words, words1, words2)
    """

    def read_first_column(filename):
        words = set()
        with open(filename, newline='', encoding="utf-8") as csvfile:
            reader = csv.reader(csvfile)
            for row in reader:
                if row:  # avoid empty lines
                    words.add(row[0].strip())
        return words

    words1 = read_first_column(file1)
    words2 = read_first_column(file2)

    common_words = words1.intersection(words2)

    return len(common_words), common_words, words1, words2


In [None]:
# Example usage:
count, common, list1, list2 = compare_csv_words("NGSL_lists\NGSL-GR_rank.csv", "NGSL_lists\NGSL_1.2_lemmatized_for_research.csv")
print(f"Number of common words: {count}")
print("Common words:", common)