In [2]:
import pandas as pd
import torch
import numpy as np
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load Fine-Tuned Model
model_path = "/Users/ritvikgupta1721/Documents/MINORII/TRAINEDBERT"
tokenizer = BertTokenizer.from_pretrained(model_path)
model = BertForSequenceClassification.from_pretrained(model_path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Create sentiment analysis pipeline using fine-tuned model
sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

Device set to use cpu


In [4]:
def get_sentiment(text):
    """Analyze sentiment using the fine-tuned BERT model with proper truncation."""
    
    # Ensure input is a string and handle NaN values
    if not isinstance(text, str):
        text = str(text)  # Convert any non-string input to string
    
    # Truncate to 512 characters (if text is too long)
    text = text[:512]

    # Run through model pipeline
    result = sentiment_pipeline(text)[0]
    
    label_map = {"LABEL_0": "negative", "LABEL_1": "neutral", "LABEL_2": "positive"}
    sentiment = label_map.get(result['label'], "unknown")
    confidence = result['score']
    
    return sentiment, confidence

In [5]:
def analyze_text_input():
    """Takes user input (paragraph) and returns sentiment analysis."""
    text = input("Enter your text: ")
    sentiment, confidence = get_sentiment(text)
    print(f"Sentiment: {sentiment} (Confidence: {confidence*100:.2f}%)")

In [6]:

def analyze_csv(file_path):
    """Reads a CSV file and analyzes sentiment for the most relevant text column."""

    # Load CSV file
    df = pd.read_csv(file_path)
    
    # Identify potential text columns (excluding numeric ones)
    text_columns = [col for col in df.columns if df[col].dtype == 'O']  # 'O' means object (string)
    
    if not text_columns:
        print("Error: No text column found for sentiment analysis.")
        return
    
    print(f"Detected text columns: {text_columns}")

    # If multiple text columns exist, concatenate them
    df["combined_text"] = df[text_columns].astype(str).agg(" ".join, axis=1)

    sentiments = []
    confidences = []

    for text in tqdm(df["combined_text"], desc="Analyzing Sentiments"):
        sentiment, confidence = get_sentiment(text)
        sentiments.append(sentiment)
        confidences.append(confidence)

    df["Predicted Sentiment"] = sentiments
    df["Confidence"] = confidences

    # Save results
    output_file_path = "/Users/ritvikgupta1721/Documents/MINORII/FEATURES/SENTIMENT/OUTPUT/sentiment_analysis_results.csv"
    df.to_csv(output_file_path, index=False)

    print(f"Sentiment analysis completed! Results saved to {output_file_path}")

    # Display results using Pandas
    print(df[["combined_text", "Predicted Sentiment", "Confidence"]].head())



In [7]:
def main():
    """User selects input type for sentiment analysis."""
    choice = input("Choose input type (1: Text Input, 2: CSV File): ")
    if choice == '1':
        analyze_text_input()
    elif choice == '2':
        file_path = input("Enter CSV file path: ")
        analyze_csv(file_path)
    else:
        print("Invalid choice. Please enter 1 or 2.")

In [12]:
# Execute main if running as script
if __name__ == "__main__":
    main()

Detected text columns: ['Emails', 'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19', 'Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22', 'Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25', 'Unnamed: 26', 'Unnamed: 27', 'Unnamed: 28', 'Unnamed: 29', 'Unnamed: 30', 'Unnamed: 31', 'Unnamed: 32', 'Unnamed: 33', 'Unnamed: 34', 'Unnamed: 35', 'Unnamed: 36', 'Unnamed: 37', 'Unnamed: 38', 'Unnamed: 39', 'Unnamed: 40', 'Unnamed: 41', 'Unnamed: 42', 'Unnamed: 43', 'Unnamed: 44', 'Unnamed: 45', 'Unnamed: 46', 'Unnamed: 47', 'Unnamed: 48', 'Unnamed: 49', 'Unnamed: 50', 'Unnamed: 51', 'Unnamed: 52', 'Unnamed: 53', 'Unnamed: 54', 'Unnamed: 55', 'Unnamed: 56', 'Unnamed: 57', 'Unnamed: 58', 'Unnamed: 59', 'Unnamed: 60', 'Unnamed: 61', 'Unnamed: 62', 'Unnamed: 63', 'Unnamed: 64', 'Unnamed: 65', 

Analyzing Sentiments: 100%|██████████| 5734/5734 [06:22<00:00, 15.01it/s]


Sentiment analysis completed! Results saved to /Users/ritvikgupta1721/Documents/MINORII/FEATURES/SENTIMENT/OUTPUT/sentiment_analysis_results.csv
                                       combined_text Predicted Sentiment  \
0  Subject: naturally irresistible your corporate...            positive   
1  Subject: the stock trading gunslinger  fanny i...            negative   
2  Subject: unbelievable new homes made easy  im ...            positive   
3  Subject: 4 color printing special  request add...            positive   
4  Subject: do not have money , get software cds ...            positive   

   Confidence  
0    0.999997  
1    0.895580  
2    0.999992  
3    0.999971  
4    0.987762  
