import pandas as pd
import nltk
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.tokenize import word_tokenize

# Download necessary resources
nltk.download('punkt')
nltk.download('wordnet')

# Load text
with open("../datasets/text_for_processing.txt", "r", encoding="utf-8") as file:
    text = file.read()

words = word_tokenize(text)

# Stemming
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(word) for word in words]

# Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word) for word in words]

# Create DataFrames
df_stemming = pd.DataFrame({"Original Word": words, "Stemmed Word": stemmed_words})
df_lemmatization = pd.DataFrame({"Original Word": words, "Lemmatized Word": lemmatized_words})

# Save results
df_stemming.to_csv("../datasets/stemming_result.csv", index=False)
df_lemmatization.to_csv("../datasets/lemmatization_result.csv", index=False)

# Display tables
print("Stemming Results:")
display(df_stemming.head())

print("Lemmatization Results:")
display(df_lemmatization.head())
