In [1]:
import sqlite3
import pandas as pd
import torch
from transformers import pipeline, AutoTokenizer
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Enable pandas progress_apply
tqdm.pandas()

In [3]:
# Connect to your SQLite database and load data
conn = sqlite3.connect("Auto_Reply_v3.db")
queries = [
    'SELECT "translation" AS Review, "Developer Reply Text" AS Reply FROM All_Reviews',
    'SELECT "translation" AS Review, "Reply" AS Reply FROM "Auto Reply Hadith"',
    'SELECT "translation" AS Review, "Reply" AS Reply FROM Auto_Reply_Reviews',
    'SELECT "translation" AS Review, "Reply" AS Reply FROM Auto_Reply_Reviews_Al_Quran'
]
dfs = [pd.read_sql_query(q, conn) for q in queries]
df = pd.concat(dfs, ignore_index=True)
conn.close()


In [4]:
# Check CUDA availability and select device
device = 0 if torch.cuda.is_available() else -1
if device == 0:
    print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("CUDA is not available. Using CPU.")

CUDA is available. Using GPU: NVIDIA GeForce RTX 4070 Ti SUPER


In [5]:
# Model for multi-class emotion classification
model_name = 'bhadresh-savani/bert-base-uncased-emotion'
tokenizer = AutoTokenizer.from_pretrained(model_name)
sentiment_analyzer = pipeline(
    'text-classification',
    model=model_name,
    tokenizer=tokenizer,
    device=device,
    return_all_scores=True
)
print(f"Sentiment pipeline will run on: {'GPU' if device==0 else 'CPU'} (device index {device})")

# Helper: standardize output lists

def analyze_sentiment(text):
    if not isinstance(text, str) or not text.strip():
        return None
    output = sentiment_analyzer(
        text,
        truncation=True,
        max_length=512
    )
    # pipeline may return list of list when batching; handle both
    scores = output[0] if isinstance(output, list) and isinstance(output[0], list) else output
    best = max(scores, key=lambda x: x['score'])
    return best['label']

# Apply sentiment analysis with progress bar and save to CSV
print("Starting sentiment analysis on reviews...")
df['sentiment'] = df['Review'].progress_apply(analyze_sentiment)

# Save results
df.to_csv('8.multipleSentiment.csv', index=False)
print(f"Processed {len(df)} rows. Results saved to 8.multipleSentiment.csv")

Device set to use cuda:0


Sentiment pipeline will run on: GPU (device index 0)
Starting sentiment analysis on reviews...


  0%|          | 8/70460 [00:00<14:43, 79.77it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 70460/70460 [08:10<00:00, 143.56it/s]


Processed 70460 rows. Results saved to 8.multipleSentiment.csv
