# TextBlob

In [4]:
import os
from nltk.sentiment import SentimentIntensityAnalyzer
from collections import defaultdict
from nltk.tokenize import sent_tokenize
import numpy as np
from textblob import TextBlob

In [5]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/ankithkodali/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [6]:
def extract_extreme_sentences(directory_path, positive_threshold=0.5, negative_threshold=-0.5):
    # store sentences by sentiment type and their counts
    extreme_sentences = {'positive': [], 'negative': []}
    sentence_counts = {'positive': 0, 'negative': 0}
    
    # get each txt file in the directory
    for filename in os.listdir(directory_path):
        if filename.endswith('.txt'):
            file_path = os.path.join(directory_path, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()
                blob = TextBlob(content)
                
                for sentence in blob.sentences:
                    polarity = sentence.sentiment.polarity
                    if polarity > positive_threshold:
                        extreme_sentences['positive'].append(polarity)
                        sentence_counts['positive'] += 1
                    elif polarity < negative_threshold:
                        extreme_sentences['negative'].append(polarity)
                        sentence_counts['negative'] += 1
                        
    return extreme_sentences, sentence_counts


In [7]:
def analyze_podcasters():
    podcaster_sentiments = defaultdict(lambda: {'positive': [], 'negative': []})
    podcaster_counts = defaultdict(lambda: {'positive': 0, 'negative': 0})
    
    for podcaster_dir in os.listdir('.'):
        if os.path.isdir(podcaster_dir):
            sentiments, counts = extract_extreme_sentences(podcaster_dir)
            podcaster_sentiments[podcaster_dir]['positive'].extend(sentiments['positive'])
            podcaster_sentiments[podcaster_dir]['negative'].extend(sentiments['negative'])
            podcaster_counts[podcaster_dir]['positive'] += counts['positive']
            podcaster_counts[podcaster_dir]['negative'] += counts['negative']

    # Calculate average polarities and store counts
    rankings = {}
    for podcaster, scores in podcaster_sentiments.items():
        avg_positive = np.mean(scores['positive']) if scores['positive'] else 0
        avg_negative = np.mean(scores['negative']) if scores['negative'] else 0
        rankings[podcaster] = {
            'avg_positive': avg_positive, 
            'avg_negative': avg_negative, 
            'count_positive': podcaster_counts[podcaster]['positive'], 
            'count_negative': podcaster_counts[podcaster]['negative']
        }

    return rankings

In [8]:
# analyze all podcasters
rankings = analyze_podcasters()

# ranked results
print("Podcaster Sentiment Analysis:")
for podcaster, data in rankings.items():
    print(f"{podcaster}:")
    print(f"  Average Positive Polarity = {data['avg_positive']:.2f}, Count of Extreme Positive Sentences = {data['count_positive']}")
    print(f"  Average Negative Polarity = {data['avg_negative']:.2f}, Count of Extreme Negative Sentences = {data['count_negative']}")

Podcaster Sentiment Analysis:
Nick:
  Average Positive Polarity = 0.76, Count of Extreme Positive Sentences = 369
  Average Negative Polarity = -0.81, Count of Extreme Negative Sentences = 62
Cowherd:
  Average Positive Polarity = 0.73, Count of Extreme Positive Sentences = 199
  Average Negative Polarity = -0.70, Count of Extreme Negative Sentences = 53
Skip:
  Average Positive Polarity = 0.79, Count of Extreme Positive Sentences = 132
  Average Negative Polarity = -0.74, Count of Extreme Negative Sentences = 37
Shannon:
  Average Positive Polarity = 0.74, Count of Extreme Positive Sentences = 269
  Average Negative Polarity = -0.69, Count of Extreme Negative Sentences = 68
Simmons:
  Average Positive Polarity = 0.74, Count of Extreme Positive Sentences = 523
  Average Negative Polarity = -0.71, Count of Extreme Negative Sentences = 135
DanPatrick:
  Average Positive Polarity = 0.73, Count of Extreme Positive Sentences = 357
  Average Negative Polarity = -0.74, Count of Extreme Negati

# Vader

In [9]:
# Function to extract extreme sentiments and calculate percentages
def extract_extreme_sentences(directory_path, positive_threshold=0.5, negative_threshold=-0.5):
    sia = SentimentIntensityAnalyzer()
    extreme_sentences = {'positive': [], 'negative': []}
    sentence_counts = {'positive': 0, 'negative': 0, 'total': 0}
    
    for filename in os.listdir(directory_path):
        if filename.endswith('.txt'):
            file_path = os.path.join(directory_path, filename)
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()
                sentences = sent_tokenize(content)
                sentence_counts['total'] += len(sentences)
                for sentence in sentences:
                    vs = sia.polarity_scores(sentence)
                    polarity = vs['compound']
                    
                    if polarity > positive_threshold:
                        extreme_sentences['positive'].append(polarity)
                        sentence_counts['positive'] += 1
                    elif polarity < negative_threshold:
                        extreme_sentences['negative'].append(polarity)
                        sentence_counts['negative'] += 1
                        
    return extreme_sentences, sentence_counts

# analyze all podcasters in directory
def analyze_podcasters():
    podcaster_sentiments = defaultdict(lambda: {'positive': [], 'negative': []})
    podcaster_counts = defaultdict(lambda: {'positive': 0, 'negative': 0, 'total': 0})
    
    for podcaster_dir in os.listdir('.'):
        if os.path.isdir(podcaster_dir):
            sentiments, counts = extract_extreme_sentences(podcaster_dir)
            podcaster_sentiments[podcaster_dir]['positive'].extend(sentiments['positive'])
            podcaster_sentiments[podcaster_dir]['negative'].extend(sentiments['negative'])
            podcaster_counts[podcaster_dir]['positive'] += counts['positive']
            podcaster_counts[podcaster_dir]['negative'] += counts['negative']
            podcaster_counts[podcaster_dir]['total'] += counts['total']

    # Calculate average polarities and percentages
    rankings = {}
    for podcaster, scores in podcaster_sentiments.items():
        avg_positive = np.mean(scores['positive']) if scores['positive'] else 0
        avg_negative = np.mean(scores['negative']) if scores['negative'] else 0
        perc_positive = (podcaster_counts[podcaster]['positive'] / podcaster_counts[podcaster]['total']) * 100 if podcaster_counts[podcaster]['total'] else 0
        perc_negative = (podcaster_counts[podcaster]['negative'] / podcaster_counts[podcaster]['total']) * 100 if podcaster_counts[podcaster]['total'] else 0
        rankings[podcaster] = {
            'avg_positive': avg_positive,
            'perc_positive': perc_positive,
            'count_positive': podcaster_counts[podcaster]['positive'],
            'avg_negative': avg_negative,
            'perc_negative': perc_negative,
            'count_negative': podcaster_counts[podcaster]['negative']
        }

    return rankings

# analyze all podcasters
rankings = analyze_podcasters()

# ranked results with percentages
print("Podcaster Sentiment Analysis:")
for podcaster, data in rankings.items():
    print(f"{podcaster}:")
    print(f"  Average Positive Polarity = {data['avg_positive']:.2f}, Count of Extreme Positive Sentences = {data['count_positive']}, Percentage Positive = {data['perc_positive']:.2f}%")
    print(f"  Average Negative Polarity = {data['avg_negative']:.2f}, Count of Extreme Negative Sentences = {data['count_negative']}, Percentage Negative = {data['perc_negative']:.2f}%")

Podcaster Sentiment Analysis:
Nick:
  Average Positive Polarity = 0.67, Count of Extreme Positive Sentences = 853, Percentage Positive = 17.09%
  Average Negative Polarity = -0.63, Count of Extreme Negative Sentences = 205, Percentage Negative = 4.11%
Cowherd:
  Average Positive Polarity = 0.69, Count of Extreme Positive Sentences = 714, Percentage Positive = 21.14%
  Average Negative Polarity = -0.64, Count of Extreme Negative Sentences = 145, Percentage Negative = 4.29%
Skip:
  Average Positive Polarity = 0.68, Count of Extreme Positive Sentences = 413, Percentage Positive = 14.25%
  Average Negative Polarity = -0.67, Count of Extreme Negative Sentences = 161, Percentage Negative = 5.56%
Shannon:
  Average Positive Polarity = 0.66, Count of Extreme Positive Sentences = 849, Percentage Positive = 10.11%
  Average Negative Polarity = -0.63, Count of Extreme Negative Sentences = 280, Percentage Negative = 3.33%
Simmons:
  Average Positive Polarity = 0.69, Count of Extreme Positive Sente