In [18]:
import pandas as pd
import requests
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

# Initialize the NLTK sentiment analyzer
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

# Cache for storing related terms from the ConceptNet API
related_terms_cache = {}

# Function to retrieve related terms from ConceptNet API with caching
def get_conceptnet_related_terms_web_api(seed_word, limit=50):
    # Check if results are already in the cache
    if seed_word in related_terms_cache:
        return related_terms_cache[seed_word]

    api_url = f"http://api.conceptnet.io/related/c/en/{seed_word}?limit={limit}"
    try:
        response = requests.get(api_url)
        if response.status_code == 200:
            data = response.json()
            related_terms = [related_term['@id'].split('/')[-1].replace('_', ' ') for related_term in data['related']]
            # Store results
            related_terms_cache[seed_word] = related_terms
            return related_terms
        else:
            print(f"Error fetching related terms for '{seed_word}'. Status code: {response.status_code}")
            return []
    except requests.RequestException as e:
        print(f"Request failed: {e}")
        return []

# Function to calculate sentiment
def aspect_related_sentiment(text, aspects):
    sentences = nltk.sent_tokenize(text)
    aspect_sentiments = {aspect: [] for aspect in aspects}
    
    for aspect in aspects:
        related_terms = get_conceptnet_related_terms_web_api(aspect)
        for sentence in sentences:
            if any(term.lower() in sentence.lower() for term in related_terms):
                score = sia.polarity_scores(sentence)['compound']
                aspect_sentiments[aspect].append(score)
    
    # Average sentiment score for each aspect with sentiment data
    final_sentiments = {
        aspect: sum(scores)/len(scores) if scores else 0 for aspect, scores in aspect_sentiments.items()
    }
    return final_sentiments

# Replace the path with the actual path to your Excel file
excel_file_path = 'C:\\Users\\student\\Desktop\\NLP Work\\Folklore_dataset.xlsx'

# Load data from Excel from the 'Russia' sheet
data = pd.read_excel(excel_file_path, sheet_name='Italy', usecols="A", nrows=500, header=None)

# Define aspects to be analyzed
aspects = ["poor", "food", "sick", "clever", "women", "water", "wealth", "wealth"]

# Calculate sentiments
data['text'] = data.iloc[:, 0].astype(str)
data['sentiments'] = data['text'].apply(lambda x: aspect_related_sentiment(x, aspects))

# Prepare results
results = pd.DataFrame(list(data['sentiments']))
results.index = data.index

# Set display options to show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Print results
print(results)


results.to_excel('C:\\Users\\student\\Desktop\\NLP Work\\Folklore_results.xlsx', index=False)

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\student\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


       poor    food    sick  clever   women    water  wealth
0    0.0000  0.0000  0.0000       0  0.0000  0.00000       0
1    0.0000  0.0000  0.0000       0  0.0000  0.00000       0
2    0.0000  0.0000  0.0000       0  0.0000  0.00000       0
3    0.0000  0.0000  0.0000       0  0.0000  0.00000       0
4    0.0000  0.0000  0.0000       0  0.0000  0.76425       0
5    0.0000  0.0000  0.0000       0  0.0000  0.00000       0
6   -0.3612  0.0000  0.0000       0  0.0000  0.00000       0
7    0.0000  0.0000  0.0000       0  0.0000  0.00000       0
8    0.0000  0.0000  0.0000       0  0.0000  0.00000       0
9    0.0000  0.0000  0.0000       0  0.0000  0.00000       0
10   0.0000  0.0000  0.0000       0  0.0000  0.00000       0
11   0.0000  0.0000  0.0000       0  0.0000  0.00000       0
12   0.0000  0.0000  0.0000       0  0.0000  0.00000       0
13   0.0000  0.0000  0.0000       0  0.0000  0.00000       0
14   0.0000  0.0000  0.0000       0  0.0000  0.00000       0
15   0.0000  0.0000  0.0