# Part 4 - Sentiment Analysis

In [None]:
#Import relevant libraries
import urllib.request
import re
import networkx as nx
import json
from networkx.readwrite import json_graph
import statistics
import matplotlib.pyplot as plt
import seaborn as sns
import statistics
import numpy as np
import csv
from networkx.algorithms.community import louvain_communities
from collections import Counter

In [None]:
# retrieving the labMIT data - sentiment data
def load_labmt_sentiment_from_url(url):
    sentiment_dict = {}
    response = urllib.request.urlopen(url)
    lines = response.read().decode('utf-8').splitlines()
    reader = csv.DictReader(lines, delimiter='\t')

    for row in reader:
        row = {k.strip(): v for k, v in row.items()}  # Clean header keys
        word = row['word'].strip().lower()
        try:
            score = float(row['happiness_average'])
            sentiment_dict[word] = score
        except ValueError:
            continue
    return sentiment_dict

# Replace with your actual GitHub raw URL
github_url = "https://raw.githubusercontent.com/AlexJHage/Rock-band-network/main/labMIT.txt"
labmt_dict = load_labmt_sentiment_from_url(github_url)


In order to determine the sentiment for the graph, three methods are created. 
1. calculate_labmt_sentiment()

2. tokenize()

3. annotate_sentiment()
- Calls the tokenize method, such that the text for each wikipedia page is tokenized. Then calls the calculate_labmt_sentiment in order to determine the average sentiment of text for each node, this sentiment value is then annotated to the node as an attribute.

In [None]:
def calculate_labmt_sentiment(tokens, labmt_dict, neutralSentimentDel):
    scores = []
    for word in tokens:
        if word in labmt_dict:
            score = labmt_dict[word]
            if neutralSentimentDel == 0 or (4 > score or score > 6):
                scores.append(score)
    return sum(scores) / len(scores) if scores else None

def tokenize(text):
    # Lowercase and remove non-alphabetic characters
    tokens = re.findall(r'\b[a-z]+\b', text.lower())
    return tokens

def annotate_sentiment(G, labmt_dict, neutralSentimentDel):
    for node in G.nodes():
        content = G.nodes[node].get('text', '')
        tokens = tokenize(content)
        sentiment = calculate_labmt_sentiment(tokens, labmt_dict, neutralSentimentDel)
        G.nodes[node]['sentiment'] = sentiment

In [None]:
# Calculate the actual sentiment
annotate_sentiment(G, labmt_dict, neutralSentimentDel = 1)

# Extract sentiment scores from graph
sentiments = [(n, G.nodes[n].get('sentiment')) for n in G.nodes() if G.nodes[n].get('sentiment') is not None]
names, scores = zip(*sentiments)

In [None]:
# Calculate statistics
mean_sentiment = statistics.mean(scores)
median_sentiment = statistics.median(scores)
variance_sentiment = statistics.variance(scores)
percentile_25 = np.percentile(scores, 25)
percentile_75 = np.percentile(scores, 75)

# Print statistics
print(f"Mean sentiment: {mean_sentiment:.3f}")
print(f"Median sentiment: {median_sentiment:.3f}")
print(f"Variance: {variance_sentiment:.3f}")
print(f"25th percentile: {percentile_25:.3f}")
print(f"75th percentile: {percentile_75:.3f}")


In [None]:
# Create histogram
plt.figure(figsize=(10, 6))
sns.histplot(scores, bins=30, kde=True, color='mediumseagreen', edgecolor='black')

# Annotate statistics on plot
plt.axvline(mean_sentiment, color='blue', linestyle='--', label=f'Mean: {mean_sentiment:.2f}')
plt.axvline(median_sentiment, color='red', linestyle='--', label=f'Median: {median_sentiment:.2f}')
plt.axvline(percentile_25, color='purple', linestyle=':', label=f'25th %ile: {percentile_25:.2f}')
plt.axvline(percentile_75, color='orange', linestyle=':', label=f'75th %ile: {percentile_75:.2f}')

plt.title("Distribution of Wikipedia Page Sentiment")
plt.xlabel("Sentiment Score")
plt.ylabel("Number of Artists")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# Find happiest and saddest artists
sorted_sentiments = sorted(sentiments, key=lambda x: x[1])
saddest = sorted_sentiments[:10]
happiest = sorted_sentiments[-10:]

print("\nðŸŽ­ Saddest Artists:")
for name, score in saddest:
    print(f"{name}: {score:.2f}")

print("\nðŸŽ‰ Happiest Artists:")
for name, score in reversed(happiest):
    print(f"{name}: {score:.2f}")

## Sentiment of communities

In [None]:
# Calculate the sentiment for the 7 largest communities

# Step 1: Select the 7 largest communities - as these are the communities for which we made TF.IDF analysis
sorted_communities = sorted(communities, key=len, reverse=True)
top_communities = sorted_communities[:7]

# Step 2: Calculate average sentiment and name each community
community_info = []

for i, community in enumerate(top_communities):
    # Get sentiment scores
    scores = [G.nodes[n]['sentiment'] for n in community if G.nodes[n].get('sentiment') is not None]
    avg_sentiment = sum(scores) / len(scores) if scores else None

    # Get top 3 most connected bands in the community
    subgraph = G.subgraph(community)
    top_nodes = sorted(subgraph.degree, key=lambda x: x[1], reverse=True)[:3]
    top_band_names = [n for n, _ in top_nodes]

    # Store info
    community_info.append({
        "index": i,
        "name": ", ".join(top_band_names),
        "avg_sentiment": avg_sentiment,
        "size": len(community)
    })

# Step 3: Print community info
print("\nðŸŽ¼ Community Sentiment Overview:")
for info in community_info:
    print(f"Community {info['index'] + 1} ({info['name']}):")
    print(f"  Size: {info['size']}")
    print(f"  Average Sentiment: {info['avg_sentiment']:.3f}" if info['avg_sentiment'] is not None else "  No sentiment data")

In [None]:
# Step 4: Identify happiest and saddest communities
valid_communities = [c for c in community_info if c['avg_sentiment'] is not None]
sorted_by_sentiment = sorted(valid_communities, key=lambda x: x['avg_sentiment'])

saddest = sorted_by_sentiment[:3]
happiest = sorted_by_sentiment[-3:]

print("\nðŸ˜¢ Saddest Communities:")
for c in saddest:
    print(f"Community {c['index'] + 1} ({c['name']}): {c['avg_sentiment']:.3f}")

print("\nðŸ˜„ Happiest Communities:")
for c in reversed(happiest):
    print(f"Community {c['index'] + 1} ({c['name']}): {c['avg_sentiment']:.3f}")