In [6]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from tqdm import tqdm
import os

def analyze_sentiment(text_list, model, tokenizer):

    text_list = [str(t) for t in text_list if pd.notna(t)]
    if not text_list:
        return []

    inputs = tokenizer(text_list, padding=True, truncation=True, return_tensors="pt", max_length=512)

    with torch.no_grad():
        outputs = model(**inputs)

    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)

    results = []
    for p in predictions:
        sentiment_score = p[2].item() - p[0].item()
        sentiment_label = model.config.id2label[torch.argmax(p).item()]
        results.append({'label': sentiment_label, 'score': sentiment_score})

    return results

def main():

    tqdm.pandas()

    model_name = "ProsusAI/finbert" 

    news_file = 'cleaned_reliance_reliance_news.csv'
    reddit_file = 'cleaned_reliance_reliancereddit.csv' 
    output_news_file = 'reliance_news_with_sentiment.csv'
    output_reddit_file = 'reliance_reddit_with_sentiment.csv'

    print("--- Starting Sentiment Analysis Process for Reliance Industries ---")

    try:

        print(f"Loading FinBERT model: {model_name}...")
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        model = AutoModelForSequenceClassification.from_pretrained(model_name)
        print("Model loaded successfully.\n")

        for input_file, output_file in [(news_file, output_news_file), (reddit_file, output_reddit_file)]:
            print(f"Processing file: {input_file}")

            if not os.path.exists(input_file):
                print(f"Error: Input file '{input_file}' not found. Please ensure the cleaning script ran successfully.")
                continue

            df = pd.read_csv(input_file)
            if 'text' not in df.columns:
                print(f"Error: 'text' column not found in {input_file}. Skipping sentiment analysis for this file.")
                continue

            print("Analyzing sentiment... (this may take a while)")
            sentiment_results = df['text'].progress_apply(lambda x: analyze_sentiment([x], model, tokenizer)[0] if pd.notna(x) else None)

            df['sentiment_label'] = [res['label'] if res else 'neutral' for res in sentiment_results]
            df['sentiment_score'] = [res['score'] if res else 0.0 for res in sentiment_results]

            df.to_csv(output_file, index=False)
            print(f"Sentiment analysis complete. Saved to {output_file}\n")

    except FileNotFoundError as e:
        print(f"Error: A required input file was not found - {e.filename}. Please ensure the cleaning script ran successfully.")
    except Exception as e:
        print(f"An unexpected error occurred during sentiment analysis: {e}")

    print("--- Sentiment Analysis Process Completed ---")

if __name__ == '__main__':
    main()


--- Starting Sentiment Analysis Process for Reliance Industries ---
Loading FinBERT model: ProsusAI/finbert...


tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Model loaded successfully.

Processing file: cleaned_reliance_reliance_news.csv
Analyzing sentiment... (this may take a while)


  9%|▉         | 16/173 [00:01<00:12, 12.86it/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

100%|██████████| 173/173 [00:12<00:00, 13.45it/s]


Sentiment analysis complete. Saved to reliance_news_with_sentiment.csv

Processing file: cleaned_reliance_reliancereddit.csv
Analyzing sentiment... (this may take a while)


100%|██████████| 907/907 [02:27<00:00,  6.16it/s]

Sentiment analysis complete. Saved to reliance_reddit_with_sentiment.csv

--- Sentiment Analysis Process Completed ---



