In [14]:
import re
import pandas as pd

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text

def load_lexicon(file_path):
    lexicon = pd.read_csv(file_path, sep='\t', header=0, names=['word', 'weight'])
    lexicon['weight'] = pd.to_numeric(lexicon['weight'], errors='coerce')
    lexicon['length'] = lexicon['word'].apply(lambda x: len(x.split()))
    return lexicon.sort_values(by='length', ascending=False).reset_index(drop=True)

positive_lexicon = load_lexicon('positive.tsv')
negative_lexicon = load_lexicon('negative.tsv')

def calculate_sentiment(text, positive_lexicon, negative_lexicon):
    text = preprocess_text(text)
    sentiment_score = 0
    words = text.split()
    
    for i in range(len(words)):
        for j in range(len(words), i, -1):
            phrase = ' '.join(words[i:j])
            
            pos_match = positive_lexicon[positive_lexicon['word'] == phrase]
            if not pos_match.empty:
                sentiment_score += pos_match.iloc[0]['weight']
                print(f"{phrase}*", end='')
                words[i:j] = [''] * (j-i)
                break
            
            neg_match = negative_lexicon[negative_lexicon['word'] == phrase]
            if not neg_match.empty:
                sentiment_score += neg_match.iloc[0]['weight']
                print(f"{phrase}+", end='')
                words[i:j] = [''] * (j-i)
                break
    
    print()  # Untuk baris baru setelah print kata-kata yang cocok
    return sentiment_score

# Example usage
example_text = "jalan terbuka putus tali gantung"
sentiment_score = calculate_sentiment(example_text, positive_lexicon, negative_lexicon)
print(f"Sentiment score: {sentiment_score}")

jalan terbuka*putus tali gantung+
Sentiment score: 1
