In [71]:
import csv
import unittest

# Define a function to load the lexicon from a CSV file
def load_lexicon(filename):
    lexicon = {}
    try:
        with open(filename, 'r', encoding='utf-8') as file:
            reader = csv.reader(file)
            next(reader)  # Skip the header row if present
            for row in reader:
                if len(row) != 2:
                    raise ValueError("Invalid CSV format: Each row should contain exactly two columns.")
                word, sentiment_score = row[0], int(row[1])
                lexicon[word] = sentiment_score
    except FileNotFoundError:
        print(f"Error: The file {filename} was not found.")
    except ValueError as e:
        print(f"Error: {e}")
        print("Please ensure that the CSV file is properly formatted with two columns: word and sentiment score.")
    except Exception as e:
        print(f"An error occurred while loading the lexicon: {e}")
    return lexicon

# Load the lexicon for both English and Sinhala from the same file
lexicon = load_lexicon('lexicons.csv')

# Define a function for lexicon-based sentiment analysis
def lexicon_sentiment_analysis(text, lexicon):
    words = text.split()
    total_polarity = 0

    for word in words:
        # Check if the word is in the lexicon
        if word in lexicon:
            total_polarity += lexicon[word]

    # Determine sentiment label based on the total polarity
    if total_polarity > 0:
        sentiment = "Positive"
    elif total_polarity < 0:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"

    return sentiment, total_polarity

# Define a function to analyze multiple sentences
def analyze_sentences(sentences, lexicon):
    results = []
    for sentence in sentences:
        sentiment, sentiment_score = lexicon_sentiment_analysis(sentence, lexicon)
        results.append((sentence, sentiment, sentiment_score))
    return results

# Input sentences for sentiment analysis
english_sentences = ["amazing", "great!","excellent"]
sinhala_sentences = ["අමාරුයි", "ස්තූති සර්"]

# Analyze the sentiment of the sentences
english_results = analyze_sentences(english_sentences, lexicon)
sinhala_results = analyze_sentences(sinhala_sentences, lexicon)

# Print the results including sentiment and sentiment score for English sentences
print("Sentiment Analysis Results for English Sentences:")
for result in english_results:
    print("Sentence:", result[0])
    print("Sentiment:", result[1])
    print("Sentiment Score:", result[2])
    print()

# Print the results including sentiment and sentiment score for Sinhala sentences
print("Sentiment Analysis Results for Sinhala Sentences:")
for result in sinhala_results:
    print("Sentence:", result[0])
    print("Sentiment:", result[1])
    print("Sentiment Score:", result[2])
    print()



Sentiment Analysis Results for English Sentences:
Sentence: amazing
Sentiment: Positive
Sentiment Score: 1

Sentence: great!
Sentiment: Positive
Sentiment Score: 1

Sentence: excellent
Sentiment: Positive
Sentiment Score: 1

Sentiment Analysis Results for Sinhala Sentences:
Sentence: අමාරුයි
Sentiment: Negative
Sentiment Score: -1

Sentence: ස්තූති සර්
Sentiment: Positive
Sentiment Score: 1



In [73]:
class TestSentimentAnalysis(unittest.TestCase):
    def setUp(self):
        # Define a sample lexicon for testing
        self.test_lexicon = {"excellent": 1, "අමාරුයි": -1}

    def test_load_lexicon(self):
        # Test loading lexicon from a CSV file
        lexicon = load_lexicon('lexicons.csv')
        self.assertIsInstance(lexicon, dict)
        self.assertTrue(len(lexicon) > 0)

    def test_lexicon_sentiment_analysis(self):
        # Test lexicon-based sentiment analysis for the word "excellent"
        text_excellent = "excellent"
        expected_sentiment_excellent = "Positive"
        expected_score_excellent = 1
        sentiment_excellent, score_excellent = lexicon_sentiment_analysis(text_excellent, self.test_lexicon)
        print(f"Sentiment for '{text_excellent}': Expected '{expected_sentiment_excellent}', Actual '{sentiment_excellent}'")
        print(f"Sentiment score for '{text_excellent}': Expected '{expected_score_excellent}', Actual '{score_excellent}'")
        self.assertEqual(sentiment_excellent, expected_sentiment_excellent)
        self.assertEqual(score_excellent, expected_score_excellent)

        # Test lexicon-based sentiment analysis for the word "අමාරුයි"
        text_sinhala = "අමාරුයි"
        expected_sentiment_sinhala = "Negative"
        expected_score_sinhala = -1
        sentiment_sinhala, score_sinhala = lexicon_sentiment_analysis(text_sinhala, self.test_lexicon)
        print(f"Sentiment for '{text_sinhala}': Expected '{expected_sentiment_sinhala}', Actual '{sentiment_sinhala}'")
        print(f"Sentiment score for '{text_sinhala}': Expected '{expected_score_sinhala}', Actual '{score_sinhala}'")
        self.assertEqual(sentiment_sinhala, expected_sentiment_sinhala)
        self.assertEqual(score_sinhala, expected_score_sinhala)

# Run the tests
unittest.main(argv=[''], exit=False, defaultTest='TestSentimentAnalysis')

..
----------------------------------------------------------------------
Ran 2 tests in 0.003s

OK


Sentiment for 'excellent': Expected 'Positive', Actual 'Positive'
Sentiment score for 'excellent': Expected '1', Actual '1'
Sentiment for 'අමාරුයි': Expected 'Negative', Actual 'Negative'
Sentiment score for 'අමාරුයි': Expected '-1', Actual '-1'


<unittest.main.TestProgram at 0x28bced49750>

In [74]:
# Define function to evaluate model accuracy
def evaluate_accuracy(model, dataset):
    correct_predictions = 0
    total_samples = len(dataset)

    for text, true_sentiment in dataset:
        predicted_sentiment, _ = model(text, lexicon)  # Assuming lexicon_sentiment_analysis is your model function
        if predicted_sentiment == true_sentiment:
            correct_predictions += 1

    accuracy = correct_predictions / total_samples
    return accuracy

# Define labeled dataset
labeled_dataset = [
    ("amazing", "Positive"),
    ("great!", "Positive"),
    ("excellent", "Positive"),
    ("අමාරුයි", "Negative"),
    ("ස්තූති සර්", "Positive"),
    ("කම්මැලියි", "Negative"),
    ("අවුල් සහගතයි", "Negative"),
    ("අභිප්රේරණය නොකළ", "Negative"),
    ("අතිමහත්", "Negative"),
    ("ආතතිය ඇති කරයි", "Negative"),
    ("වෙහෙසකරයි", "Negative"),
    ("ඉල්ලනවා", "Negative"),
    ("අනම්ය", "Negative"),
    ("කලකිරීමයි", "Negative"),
    ("නරක", "Negative"),
    ("ඉතා නරකයි", "Negative"),
    ("නරකම", "Negative"),
    ("කලකිරීමයි", "Negative"),
    ("අභයෝගාත්මක", "Negative"),
    ("දුෂ්කර", "Negative"),
    ("අවුල් සහගතයි", "Negative"),
    ("අතිමහත්", "Negative"),
    ("ආතති සහගතයි", "Negative"),
    ("අධෛර්යමත් කරයි", "Negative"),
    ("පුනරාවර්තන", "Negative"),
    ("ඒකාකාරී", "Negative"),
    ("ඉල්ලනවා", "Negative"),
    ("කාලය කා දමන", "Negative"),
    ("පුදුම සහගතයි", "Positive"),
    ("මහා", "Positive"),
    ("සුපිරියි", "Positive"),
    ("ප්රියජනකයි", "Positive"),
    ("ප්‍රශංසනීයයි", "Positive"),
    ("දක්ෂයි", "Positive"),
    ("ධනාත්මක", "Positive"),
    ("ප්‍රශංසනීයයි", "Positive"),
    ("A+", "Positive"),
    ("පොරොන්දු වෙනවා", "Positive"),
    ("ආධාරක", "Positive"),
    ("යෙදෙන", "Positive"),
    ("පොහොසත් කරනවා", "Positive"),
    ("බලගැන්වීම", "Positive"),
    ("විපාක දෙන", "Positive"),
    ("detailed", "Positive"),
    ("best", "Positive"),
    ("worse", "Negative"),
    ("better", "Positive"),
    ("perfect", "Positive"),
    ("awesome", "Positive"),
    ("broing", "Negative"),
    ("useful", "Positive"),
    ("amazing", "Positive"),
    ("clear", "Positive"),
    ("concise", "Positive"),
    ("helpful", "Positive"),
    ("understandable", "Positive"),
    ("informative", "Positive"),
    ("useful", "Positive"),
    ("hard", "Negative"),
    ("dead", "Negative"),
    ("missing", "Negative"),
    ("detailed", "Positive"),
    ("learned", "Positive"),
    ("enjoyed", "Positive"),
    ("typos", "Negative"),
    ("Mistakes", "Negative"),
    ("higher level ", "Positive"),
    ("cannot", "Negative"),
    ("slow", "Negative"),
    ("like", "Positive"),
    ("discouraged", "Negative"),
    ("recommend", "Positive"),
    ("uninteresting", "Negative"),
    ("Valuable", "Positive"),
    ("enriching", "Positive"),
    ("helped", "Positive"),
    ("bad", "Negative")
]

# Evaluate accuracy of the model
accuracy = evaluate_accuracy(lexicon_sentiment_analysis, labeled_dataset)
print("Model Accuracy:", accuracy)

Model Accuracy: 0.8441558441558441
