In [9]:
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize

# Initialize lemmatizer and stemmer
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()

In [12]:
# Load TempoWordNet data
# ID    Synset_name    POS   Synset_gloss  Prob_of_being_Past    Prob_of_being_Present    Prob_of_being_Future    Prob_of_being_Atemporal
columns = ["ID", "Synset_name", "POS", "Synset_gloss", "Prob_of_being_Past", "Prob_of_being_Present", "Prob_of_being_Future", "Prob_of_being_Atemporal"]
tempowordnet_df = pd.read_csv('./TempoWordNet/TempoWnL_1.0.txt', sep='\t', names=columns)

# Ensure 'Name' column has no NaN values and apply the split operation only on valid strings
tempowordnet_df['Word'] = tempowordnet_df['Synset_name'].apply(lambda x: x.split('.')[0] if isinstance(x, str) else None)

# Drop rows where 'Word' is None (if desired)
tempowordnet_df.dropna(subset=['Word'], inplace=True)

# Display the dataframe to verify
print(tempowordnet_df.head())


Empty DataFrame
Columns: [ID, Synset_name, POS, Synset_gloss, Prob_of_being_Past, Prob_of_being_Present, Prob_of_being_Future, Prob_of_being_Atemporal, Word]
Index: []


In [13]:
def preprocess_text(text):
    # Tokenize
    words = word_tokenize(text)
    
    # Remove stop words
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word.lower() not in stop_words]
    
    # Lemmatize and stem
    words = [stemmer.stem(lemmatizer.lemmatize(word)) for word in words]
    
    return words


In [15]:
def calculate_temporal_probabilities(words):
    temporal_probs = []
    
    for word in words:
        try:
            word_probs = tempowordnet_df[tempowordnet_df['Word'] == word].iloc[0]
            probs = (word_probs['Past_Prob'], word_probs['Present_Prob'], word_probs['Future_Prob'], word_probs['Atemporal_Prob'])
            temporal_probs.append(probs)
        except Exception as e:
            # Word not found in TempoWordNet, skip it
            continue
    
    if temporal_probs:
        avg_probs = np.mean(temporal_probs, axis=0)
        return avg_probs
    else:
        return None


In [16]:
def process_sentence(sentence):
    words = preprocess_text(sentence)
    avg_probs = calculate_temporal_probabilities(words)
    return avg_probs

In [17]:
sentence = input('input sentence for processing')
print(sentence)
avg_temporal_probs = process_sentence(sentence)

if avg_temporal_probs is not None:
    print(f"Average Temporal Probabilities for the sentence:\nPast: {avg_temporal_probs[0]}, Present: {avg_temporal_probs[1]}, Future: {avg_temporal_probs[2]}, Atemporal: {avg_temporal_probs[3]}")
else:
    print("No valid words found in TempoWordNet.")

my name is unprocessed
No valid words found in TempoWordNet.
