In [None]:
## Import Libraries

import pandas as pd
from textblob import TextBlob
import textstat
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import re
from collections import Counter

In [None]:
# Load Dante text
with open(r"C:\Users\user\Desktop\datasets\dante_inferno.txt", "r", encoding="utf-8") as file:
    inferno = file.read()

In [None]:
# Split into sections (e.g., Cantos)

sections = [s.strip() for s in inferno.split("Canto") if s.strip()]
cantos = [f"Canto {s}" for s in sections]

metrics = []
all_words =[]

In [None]:
# Process each Canto

for i, text in enumerate(cantos, 1):
    blob = TextBlob(text)
    words = re.findall(r'\b\w+\b', text.lower())

metrics.append({
        "Canto": i,
        "FleschEase": textstat.flesch_reading_ease(text),
        "FKGrade": textstat.flesch_kincaid_grade(text),
        "SMOG": textstat.smog_index(text),
        "DifficultWords": textstat.difficult_words(text),
        "LexicalDiversity": round(len(set(words)) / len(words), 3),
        "AvgSentenceLength": textstat.avg_sentence_length(text),
        "Polarity": blob.sentiment.polarity,
        "Subjectivity": blob.sentiment.subjectivity
})
all_words.extend(words)

In [None]:
### import textstat is used here to obtain the scores

text = "Your full text or canto here."

print("Flesch Reading Ease:", textstat.flesch_reading_ease(text))
print("Flesch-Kincaid Grade:", textstat.flesch_kincaid_grade(text))
print("SMOG Index:", textstat.smog_index(text))
print("Automated Readability Index:", textstat.automated_readability_index(text))
print("Dale-Chall Score:", textstat.dale_chall_readability_score(text))
print("Difficult Words:", textstat.difficult_words(text))
print("Lexicon Count:", textstat.lexicon_count(text))
print("Average Sentence Length:", textstat.avg_sentence_length(text))

In [None]:
# save the metrics into csv 

df = pd.DataFrame(metrics)
df.to_csv("dante_metrics.csv", index=False)

In [None]:
### Generate WordCloud

text_combined = " ".join(all_words)
wordcloud = WordCloud(width=1000, height=500, background_color="white").generate(text_combined)

plt.figure(figsize=(12, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title("Word Cloud of Dante's Inferno")
plt.tight_layout()
plt.show()

In [None]:
# use import pandas as pd

# Correctly formatted file path
filepath = r"C:\Users\user\NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"

# Load NRC Emotion Lexicon
nrc = pd.read_csv(filepath, sep='\t', names=["word", "emotion", "association"])

# Filter for words with a positive association (1 = associated with emotion)
nrc = nrc[nrc['association'] == 1]

# Create a dictionary: word → list of associated emotions
emotion_dict = nrc.groupby('word')['emotion'].apply(list).to_dict()

# Preview
print("Sample emotion associations:")
for word in list(emotion_dict.keys())[:5]:
    print(f"{word} → {emotion_dict[word]}")

In [None]:
# emotional frequency analysis

import matplotlib.pyplot as plt
from collections import Counter
import re

# Load Dante text
with open(r"C:\Users\user\Desktop\datasets\dante_inferno.txt", "r", encoding="utf-8") as file:
    inferno = file.read()

# Step 2: Clean and tokenize
words = re.findall(r'\b[a-z]{2,}\b', inferno)

# Step 3: Match words to emotion dictionary
emotion_words = []
for word in words:
    if word in emotion_dict:
        emotion_words.extend(emotion_dict[word])

# Step 4: Count emotions
emotion_freq = Counter(emotion_words)

# Step 5: Plot emotion frequencies
plt.figure(figsize=(12, 6))
plt.bar(emotion_freq.keys(), emotion_freq.values(), color='skyblue')
plt.title("Emotion Frequency in Dante's Inferno")
plt.xlabel("Emotion")
plt.ylabel("Frequency")
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()