In [2]:
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
import string
from botocore.config import Config
import boto3


# --- 1. Download VADER Lexicon (only run once) ---
try:
    nltk.data.find('sentiment/vader_lexicon.zip')
    nltk.download('stopwords')
except nltk.downloader.DownloadError:
    print("VADER lexicon not found. Downloading...")
    nltk.download('vader_lexicon')
    print("VADER lexicon downloaded.")

# --- 2. Initialize the Sentiment Intensity Analyzer ---
sid = SentimentIntensityAnalyzer()
stop_words=set(stopwords.words('english'))

# --- 3. Define a function to get sentiment scores ---
def get_vader_sentiment(text):
    if pd.isna(text) or not isinstance(text, str): # Handle potential NaN or non-string values
        return {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compound': 0.0, 'overall_sentiment': 'Neutral (N/A)'}
    if isinstance(text,str):
        words=text.lower().split()
        words=[word for word in words if word not in stop_words and word not in string.punctuation]
        processed_text = " ".join(words)

    scores = sid.polarity_scores(processed_text)
    compound_score = scores['compound']

    # Classify overall sentiment based on compound score thresholds
    if compound_score >= 0.05:
        overall_sentiment = "Positive"
    elif compound_score <= -0.05:
        overall_sentiment = "Negative"
    else:
        overall_sentiment = "Neutral"

    return {
        'neg': scores['neg'],
        'neu': scores['neu'],
        'pos': scores['pos'],
        'compound': compound_score,
        'overall_sentiment': overall_sentiment
    }

# --- 4. Load the CSV file ---
#csv_file_path = 'C:\Users\91904\combined.csv' # Make sure this file is in the same directory or provide full path

try:
    df = pd.read_csv(r'C:\Users\91904\combined.csv')
   # print(f"Successfully loaded '{csv_file_path}'. Shape: {df.shape}")
    print("\nOriginal DataFrame head:")
    print(df.head())
except FileNotFoundError:
    print(f"Error: '{csv_file_path}' not found. Please ensure the CSV file is in the correct directory.")
    exit()

# --- 5. Apply the sentiment analysis function to the 'article_text' column ---
# It's good practice to create a copy to avoid SettingWithCopyWarning
df_sentiment = df.copy()

print("\nApplying sentiment analysis to 'article_text' column...")
# Apply the function and store results in new columns
# Using expand=True will create new columns directly from the dictionary returned by the function
sentiment_results = df_sentiment['short_description'].apply(lambda x: pd.Series(get_vader_sentiment(x)))

# Concatenate the new sentiment columns with the original DataFrame
df_sentiment = pd.concat([df_sentiment, sentiment_results], axis=1)

print("\nDataFrame with sentiment scores head:")
print(df_sentiment.head())

# --- 6. (Optional) Save the results to a new CSV file ---
#output_csv_file_path = 'news_articles_with_sentiment.csv'
df_sentiment.to_csv(r'C:\Users\91904\combinedsent.csv', index=False)
#print(f"\nResults saved to '{output_csv_file_path}'")

# --- Further Analysis (Example) ---
print("\n--- Sentiment Distribution ---")
print(df_sentiment['overall_sentiment'].value_counts())

print("\n--- Average Compound Score by Category ---")
print(df_sentiment.groupby('category')['compound'].mean().sort_values(ascending=False))

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\91904\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!



Original DataFrame head:
                                                link  \
0  https://www.huffpost.com/entry/twitch-streamer...   
1  https://www.huffpost.com/entry/dodgers-basebal...   
2  https://www.huffpost.com/entry/biden-us-forces...   
3  https://www.huffpost.com/entry/ukraine-festiva...   
4  https://www.huffpost.com/entry/2022-wnba-final...   

                                            headline  category  \
0  Twitch Bans Gambling Sites After Streamer Scam...      TECH   
1  Maury Wills, Base-Stealing Shortstop For Dodge...    SPORTS   
2  Biden Says U.S. Forces Would Defend Taiwan If ...  POLITICS   
3  ‘Beautiful And Sad At The Same Time’: Ukrainia...  POLITICS   
4  Las Vegas Aces Win First WNBA Title, Chelsea G...    SPORTS   

                                   short_description             authors  \
0  One man's claims that he scammed people on the...        Ben Blanchet   
1  Maury Wills, who helped the Los Angeles Dodger...     Beth Harris, AP   
2  President

In [3]:

config = Config(
    retries = {
        'max_attempts': 10,
        'mode': 'standard'
    },
    connect_timeout=30,
    read_timeout=120
)
acc_key="AKIA2DK24S55T5V3RET7"
sec_key="Z9goqsPC1tTyhfo7It6+HXsnG6H//19h77LrLHQf"
s3 = boto3.client('s3', config=config,aws_access_key_id=acc_key,aws_secret_access_key=sec_key)

file = r'C:\Users\91904\combinedsent.csv'
s3.upload_file(file, 'news-senticonomy-bucket', 'combinedsenti.csv')
