In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import os
import pandas as pd

# Load the fine-tuned model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained("fine-tuned-bert-imdb")
tokenizer = AutoTokenizer.from_pretrained("fine-tuned-bert-imdb")

# Define a function to perform sentiment analysis
def classify_comments(comments):
    inputs = tokenizer(comments, padding=True, truncation=True, return_tensors="pt")
    outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=-1)
    return predictions

# Labels for classification
labels = ['negative', 'positive']

# Directory where the CSV files are stored
directory = '/content'

In [None]:
# Process each CSV file in the directory one at a time
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory, filename)
        # Read the CSV file
        df = pd.read_csv(file_path)

        # Clean column names
        df.columns = df.columns.str.replace(r'^="|"$', '', regex=True)
        print("Cleaned Columns found:", df.columns.tolist())

        # Check if 'prediction' column exists
        if 'prediction' not in df.columns:
            # Check if 'Comment Text' column exists
            if 'Comment Text' in df.columns:
                # Extract comments
                comments = df['Comment Text'].tolist()

                # Classify comments in batches to manage memory usage
                batch_size = 32
                results = []
                for i in range(0, len(comments), batch_size):
                    batch_comments = comments[i:i + batch_size]
                    predictions = classify_comments(batch_comments)
                    results.extend([labels[prediction] for prediction in predictions])

                # Add the results to a new column
                df['prediction'] = results

                # Save the updated DataFrame back to the CSV file
                df.to_csv(file_path, index=False)

                # Print comments and their sentiments
                for comment, result in zip(comments, results):
                    print(f"Comment: {comment}")
                    print(f"Sentiment: {result}")
            else:
                print(f"'Comment Text' column not found in {filename}")
        else:
            print(f"'prediction' column already exists in {filename}, skipping file.")

        # Clear variables to free up memory
        del df
        del comments
        del results
        torch.cuda.empty_cache()  # If using a GPU
