In [35]:
import pandas as pd
import glob
import os
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [36]:
# Define folder path containing CSV files
folder_path = "C:/Users/ewald/Documents/TU München/SS25 [NTU]/Machine Learning_Python/Final Project/VADER/raw_cleaned"
print(os.listdir(folder_path))


['raw10_cleaned.csv', 'raw10_vader.csv', 'raw1_cleaned.csv', 'raw1_vader.csv', 'raw2_cleaned.csv', 'raw2_vader.csv', 'raw3_cleaned.csv', 'raw3_vader.csv', 'raw4_cleaned.csv', 'raw4_vader.csv', 'raw5_cleaned.csv', 'raw5_vader.csv', 'raw6_cleaned.csv', 'raw6_vader.csv', 'raw7_cleaned.csv', 'raw7_vader.csv', 'raw8_cleaned.csv', 'raw8_vader.csv', 'raw9_cleaned.csv', 'raw9_vader.csv', 'sentiment_summary_vader.csv']


In [37]:
# Hard-code file names as loop did not work
csv_files = [os.path.join(folder_path, f'raw{i}_cleaned.csv') for i in range(1, 11)]

In [38]:
# Debugging: Check file existence
print(f"Checking files in {folder_path}:")
for f in csv_files:
    print(f"Does {os.path.basename(f)} exist? {os.path.exists(f)}")

Checking files in C:/Users/ewald/Documents/TU München/SS25 [NTU]/Machine Learning_Python/Final Project/VADER/raw_cleaned:
Does raw1_cleaned.csv exist? True
Does raw2_cleaned.csv exist? True
Does raw3_cleaned.csv exist? True
Does raw4_cleaned.csv exist? True
Does raw5_cleaned.csv exist? True
Does raw6_cleaned.csv exist? True
Does raw7_cleaned.csv exist? True
Does raw8_cleaned.csv exist? True
Does raw9_cleaned.csv exist? True
Does raw10_cleaned.csv exist? True


In [41]:
# Initialize VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Function to apply VADER sentiment analysis to a text column
def apply_vader(text):
    if isinstance(text, str) and text.strip():  # Check for valid string
        scores = analyzer.polarity_scores(text)
        compound = scores['compound']
        #Assign sentiment based on compound score
        sentiment_label = "positive" if compound > 0.05 else "negative" if compound < -0.05 else "neutral"
        # Print sentiment label for debugging
        return pd.Series({
            'pos': scores['pos'],
            'neg': scores['neg'],
            'neu': scores['neu'],
            'compound': scores['compound'],
            "sentiment_label": sentiment_label
        })
    else:
        return pd.Series({'pos': 0.0, 'neg': 0.0, 'neu': 0.0, 'compound': 0.0, "sentiment_label": "neutral"})

In [45]:
# Store summary statistics
summary = []

for file in csv_files:
    # Read CSV
    df = pd.read_csv(file)
    
    # Ensure 'comment_body' column exists
    if 'comment_body' not in df.columns:
        print(f"Warning: 'comment_body' column not found in {file}. Skipping...")
        continue
    
    # Apply VADER sentiment analysis to 'comment_body'
    sentiment_scores = df['comment_body'].apply(apply_vader)
    df = pd.concat([df, sentiment_scores], axis=1)
    
    # Save output to a new CSV with '_vader' suffix
    output_file = file.replace('_cleaned.csv', '_vader.csv')
    df.to_csv(output_file, index=False)
    print(f"Processed and saved: {output_file}")
    
    # Compute summary statistics (mean pos, neg, neu, compound)
    summary.append({
        'File': os.path.basename(file),
        'Mean Positive Score': df['pos'].mean(),
        'Mean Negative Score': df['neg'].mean(),
        'Mean Neutral Score': df['neu'].mean(),
        'Mean Compound Score': df['compound'].mean()
    })
   
# Create and save summary DataFrame
summary_df = pd.DataFrame(summary)
summary_df.to_csv(os.path.join(folder_path, 'sentiment_summary_vader.csv'), index=False)
print("\nSummary of mean compound scores saved to 'sentiment_summary_vader.csv':")
print(summary_df)

Processed and saved: C:/Users/ewald/Documents/TU München/SS25 [NTU]/Machine Learning_Python/Final Project/VADER/raw_cleaned\raw1_vader.csv
Processed and saved: C:/Users/ewald/Documents/TU München/SS25 [NTU]/Machine Learning_Python/Final Project/VADER/raw_cleaned\raw2_vader.csv
Processed and saved: C:/Users/ewald/Documents/TU München/SS25 [NTU]/Machine Learning_Python/Final Project/VADER/raw_cleaned\raw3_vader.csv
Processed and saved: C:/Users/ewald/Documents/TU München/SS25 [NTU]/Machine Learning_Python/Final Project/VADER/raw_cleaned\raw4_vader.csv
Processed and saved: C:/Users/ewald/Documents/TU München/SS25 [NTU]/Machine Learning_Python/Final Project/VADER/raw_cleaned\raw5_vader.csv
Processed and saved: C:/Users/ewald/Documents/TU München/SS25 [NTU]/Machine Learning_Python/Final Project/VADER/raw_cleaned\raw6_vader.csv
Processed and saved: C:/Users/ewald/Documents/TU München/SS25 [NTU]/Machine Learning_Python/Final Project/VADER/raw_cleaned\raw7_vader.csv
Processed and saved: C:/Use