In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score ,precision_score , recall_score , f1_score

import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification 
from transformers import pipeline

In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# Load the CSV file
csv_file_path = "reviews_data.csv"
# Batch processing parameters
batch_size = 32
df = pd.read_csv(csv_file_path).head(12* batch_size)

# Load the tokenizer and model
model_name = "Camelia7v/bert-sentiment-analysis-model-40k-samples"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Create a sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

# Lists to store actual and predicted sentiments
actual_sentiments = []
predicted_sentiments = []

 # Adjust the batch size based on your system's memory capacity

# Process the data in batches
for i in range(0, len(df), batch_size):
    batch_df = df.iloc[i:i+batch_size]

    # Extract texts from the batch
    batch_texts = batch_df['processed Reviews'].tolist()

    # Encode the batch of texts with truncation and padding
    batch_encoding = tokenizer.batch_encode_plus(
        batch_texts,
        max_length=512,  # adjust the max length as needed
        padding='max_length',
        truncation=True,
        return_tensors='pt'  # return PyTorch tensors
    )

    # Get the actual sentiments from the dataset
    batch_actual_sentiments = batch_df['sentiment'].tolist()

    # Perform sentiment analysis on the batch
    batch_results = sentiment_pipeline(batch_texts)

    # Get the predicted sentiments from the pipeline
    batch_predicted_sentiments = [result['label'] for result in batch_results]
    predicted_numeric_sentiments = [int(label.split('_')[1]) for label in batch_predicted_sentiments]

    actual_sentiments.extend(batch_actual_sentiments)
    predicted_sentiments.extend(predicted_numeric_sentiments)



In [None]:
accuracy = accuracy_score(actual_sentiments,predicted_sentiments)

precision = precision_score(actual_sentiments, predicted_sentiments, average='weighted')
recall = recall_score(actual_sentiments, predicted_sentiments, average='weighted')
f1 = f1_score(actual_sentiments, predicted_sentiments, average='weighted')
print(f"accuracy: {precision:.4f}")

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

In [None]:
# Calculate F1 score for each class separately
precision_class_0 = f1_score(actual_sentiments, predicted_sentiments, labels=[0], average='weighted')
precision_class_1 = f1_score(actual_sentiments, predicted_sentiments, labels=[1], average='weighted')

print(f"F1 Score for Class 0: {f1_class_0:.4f}")
print(f"F1 Score for Class 1: {f1_class_1:.4f}")

In [None]:
f1_class_0 =precision_score(actual_sentiments, predicted_sentiments, labels=[0], average='weighted')
f1_class_1 = precision_score(actual_sentiments, predicted_sentiments, labels=[1], average='weighted')

print(f"F1 Score for Class 0: {f1_class_0:.4f}")
print(f"F1 Score for Class 1: {f1_class_1:.4f}")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [None]:
# Visualize confusion matrix
cm = confusion_matrix(actual_sentiments, predicted_sentiments)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=['Class 0', 'Class 1'], yticklabels=['Class 0', 'Class 1'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
# Visualize overall metrics
metrics_names = ['Accuracy', 'Precision', 'Recall', 'F1 Score']
metrics_values = [accuracy, precision, recall, f1]

plt.figure(figsize=(10, 6))
sns.barplot(x=metrics_values, y=metrics_names, palette="viridis")
plt.title('Overall Metrics')
plt.xlabel('Score')
plt.show()