In [5]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 -q
!pip install transformers pandas scikit-learn openvino-dev[onnx] gradio plotly numpy tqdm --upgrade -q
print("Libraries installed successfully!")

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.2/23.2 MB[0m [31m87.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m875.6/875.6 kB[0m [31m41.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.1/13.1 MB[0m [31m110.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m663.9/663.9 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m417.9/417.9 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m168.4/168.4 MB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.1/58.1 MB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m128.2/128.2 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━

In [6]:
import pandas as pd

In [1]:
import pandas as pd
import re
from sklearn.model_selection import train_test_split

# Load dataset
df = pd.read_csv('/content/Reviews.csv', encoding='latin1', on_bad_lines='skip')
print(f"Dataset loaded with {len(df)} rows!")

# Use the full dataset (no sampling) for maximum accuracy
# Enhanced text cleaning function
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'[^A-Za-z0-9\s.,!?]', '', text)  # Keep some punctuation for context
    text = re.sub(r'\s+', ' ', text)  # Normalize spaces
    return text.strip() or "no content"

df['Text'] = df['Text'].apply(clean_text)
df = df[df['Text'] != "no content"]  # Remove empty reviews

# Binary sentiment: 1 (positive, score >= 4), 0 (negative, score <= 2)
df['Sentiment'] = df['Score'].apply(lambda x: 1 if x >= 4 else 0 if x <= 2 else -1)
df = df[df['Sentiment'] != -1]  # Drop neutral reviews

# Balance dataset
df_positive = df[df['Sentiment'] == 1].sample(n=min(len(df[df['Sentiment'] == 0]), len(df[df['Sentiment'] == 1])), random_state=42)
df_negative = df[df['Sentiment'] == 0]
df = pd.concat([df_positive, df_negative])
print(f"Balanced dataset size: {len(df)}")

# Split into train, validation, and test sets (80-10-10)
train_texts, temp_texts, train_labels, temp_labels = train_test_split(
    df['Text'].tolist(), df['Sentiment'].tolist(), test_size=0.2, random_state=42
)
val_texts, test_texts, val_labels, test_labels = train_test_split(
    temp_texts, temp_labels, test_size=0.5, random_state=42
)
print(f"Training samples: {len(train_texts)}, Validation samples: {len(val_texts)}, Test samples: {len(test_texts)}")

Dataset loaded with 6263 rows!
Balanced dataset size: 1848
Training samples: 1478, Validation samples: 185, Test samples: 185


In [2]:
from transformers import BertTokenizer
import torch

# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}, GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")

# Use BERT (larger model) instead of DistilBERT
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize with a higher max length for better context
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=256)
val_encodings = tokenizer(val_texts, truncation=True, padding=True, max_length=256)
test_encodings = tokenizer(test_texts, truncation=True, padding=True, max_length=256)
print("Tokenization complete!")

# Create PyTorch datasets
class ReviewDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = ReviewDataset(train_encodings, train_labels)
val_dataset = ReviewDataset(val_encodings, val_labels)
test_dataset = ReviewDataset(test_encodings, test_labels)
print("Datasets created!")

Using device: cuda, GPU: Tesla T4


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Tokenization complete!
Datasets created!


In [3]:
from transformers import BertForSequenceClassification, get_linear_schedule_with_warmup
from torch.utils.data import DataLoader
from torch.optim import AdamW
from tqdm.auto import tqdm
import numpy as np

# Load BERT model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2).to(device)

# Data loaders with adaptive batch size
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)  # Larger batch size if memory allows
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

# Optimizer and scheduler with fine-tuned parameters
optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=0.01)  # Add weight decay for regularization
total_steps = len(train_loader) * 10  # 10 epochs for deeper training
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=1000, num_training_steps=total_steps)

# Training loop with early stopping
num_epochs = 50  # Allow extensive training
best_accuracy = 0
patience = 5
no_improve = 0

for epoch in range(num_epochs):
    model.train()
    print(f"Epoch {epoch + 1}/{num_epochs}")
    progress_bar = tqdm(train_loader, desc="Training")
    total_loss = 0
    for batch in progress_bar:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # Gradient clipping
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        progress_bar.set_postfix({'loss': loss.item()})

    avg_train_loss = total_loss / len(train_loader)
    print(f"Average Training Loss: {avg_train_loss:.4f}")

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in val_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            predictions = torch.argmax(outputs.logits, dim=-1)
            correct += (predictions == batch['labels']).sum().item()
            total += batch['labels'].size(0)
    accuracy = correct / total
    print(f"Validation Accuracy: {accuracy:.4f}")

    # Early stopping and model saving
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        model.save_pretrained("best_model")
        tokenizer.save_pretrained("best_model")
        print("Best model saved!")
        no_improve = 0
    else:
        no_improve += 1
        if no_improve >= patience:
            print("Early stopping triggered!")
            break

# Evaluate on test set
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for batch in test_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        predictions = torch.argmax(outputs.logits, dim=-1)
        correct += (predictions == batch['labels']).sum().item()
        total += batch['labels'].size(0)
test_accuracy = correct / total
print(f"Final Test Accuracy: {test_accuracy:.4f}")

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/50


Training:   0%|          | 0/93 [00:00<?, ?it/s]

Average Training Loss: 0.6984
Validation Accuracy: 0.6811
Best model saved!
Epoch 2/50


Training:   0%|          | 0/93 [00:00<?, ?it/s]

Average Training Loss: 0.6079
Validation Accuracy: 0.7946
Best model saved!
Epoch 3/50


Training:   0%|          | 0/93 [00:00<?, ?it/s]

Average Training Loss: 0.3832
Validation Accuracy: 0.9135
Best model saved!
Epoch 4/50


Training:   0%|          | 0/93 [00:00<?, ?it/s]

Average Training Loss: 0.2269
Validation Accuracy: 0.8865
Epoch 5/50


Training:   0%|          | 0/93 [00:00<?, ?it/s]

Average Training Loss: 0.1450
Validation Accuracy: 0.9351
Best model saved!
Epoch 6/50


Training:   0%|          | 0/93 [00:00<?, ?it/s]

Average Training Loss: 0.0901
Validation Accuracy: 0.8973
Epoch 7/50


Training:   0%|          | 0/93 [00:00<?, ?it/s]

Average Training Loss: 0.0448
Validation Accuracy: 0.9351
Epoch 8/50


Training:   0%|          | 0/93 [00:00<?, ?it/s]

Average Training Loss: 0.0264
Validation Accuracy: 0.9297
Epoch 9/50


Training:   0%|          | 0/93 [00:00<?, ?it/s]

Average Training Loss: 0.0171
Validation Accuracy: 0.9297
Epoch 10/50


Training:   0%|          | 0/93 [00:00<?, ?it/s]

Average Training Loss: 0.0342
Validation Accuracy: 0.9405
Best model saved!
Epoch 11/50


Training:   0%|          | 0/93 [00:00<?, ?it/s]

Average Training Loss: 0.0100
Validation Accuracy: 0.9297
Epoch 12/50


Training:   0%|          | 0/93 [00:00<?, ?it/s]

Average Training Loss: 0.0028
Validation Accuracy: 0.9297
Epoch 13/50


Training:   0%|          | 0/93 [00:00<?, ?it/s]

Average Training Loss: 0.0042
Validation Accuracy: 0.9297
Epoch 14/50


Training:   0%|          | 0/93 [00:00<?, ?it/s]

Average Training Loss: 0.0054
Validation Accuracy: 0.9297
Epoch 15/50


Training:   0%|          | 0/93 [00:00<?, ?it/s]

Average Training Loss: 0.0005
Validation Accuracy: 0.9297
Early stopping triggered!
Final Test Accuracy: 0.9459


In [4]:
!pip install openvino-dev[onnx]

Collecting openvino-dev[onnx]
  Downloading openvino_dev-2024.6.0-17404-py3-none-any.whl.metadata (15 kB)
Collecting networkx<=3.1.0 (from openvino-dev[onnx])
  Downloading networkx-3.1-py3-none-any.whl.metadata (5.3 kB)
Collecting numpy<2.0.0,>=1.16.6 (from openvino-dev[onnx])
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openvino-telemetry>=2023.2.1 (from openvino-dev[onnx])
  Downloading openvino_telemetry-2025.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting openvino==2024.6.0 (from openvino-dev[onnx])
  Downloading openvino-2024.6.0-17404-cp311-cp311-manylinux2014_x86_64.whl.metadata (8.3 kB)
Collecting fastjsonschema<2.18,>=2.15.1 (from openvino-dev[onnx])
  Downloading fastjsonschema-2.17.1-py3-none-any.whl.metadata (2.0 kB)
Collecting onnx<=1.17.0,>=1.8.1 (from openvino-dev[onnx])
  Dow

In [5]:
from openvino.runtime import Core
from transformers import AutoModelForSequenceClassification

# Load the trained model
model = AutoModelForSequenceClassification.from_pretrained("best_model")
tokenizer = BertTokenizer.from_pretrained("best_model")

# Export to ONNX
dummy_input = tokenizer("This is a test", return_tensors="pt", padding=True, truncation=True, max_length=256)
torch.onnx.export(
    model,
    (dummy_input['input_ids'], dummy_input['attention_mask']),
    "sentiment_model.onnx",
    input_names=['input_ids', 'attention_mask'],
    output_names=['logits'],
    dynamic_axes={'input_ids': {0: 'batch_size'}, 'attention_mask': {0: 'batch_size'}, 'logits': {0: 'batch_size'}}
)

# Convert ONNX to OpenVINO IR
!mo --input_model sentiment_model.onnx --output_dir ./openvino_model --data_type FP16
print("Model converted to OpenVINO IR format!")

[ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release.
In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. 
Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html
usage: main.py [options]
main.py: error: unrecognized arguments: --data_type FP16
Model converted to OpenVINO IR format!


In [6]:
!pip install gradio plotly numpy tqdm --upgrade -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.5/46.5 MB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.2/322.2 kB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.8/14.8 MB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m35.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m43.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import plotly.graph_objects as go
import numpy as np
from tqdm import tqdm
import pandas as pd
import plotly.io as pio

# Ensure Plotly renders correctly in the environment
pio.renderers.default = "iframe"  # Use iframe renderer for better compatibility in Colab/Jupyter

# Load the best model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = AutoModelForSequenceClassification.from_pretrained("best_model").to(device)
tokenizer = AutoTokenizer.from_pretrained("best_model")
model.eval()
print(f"Model loaded on {device}")

# Text cleaning function (from Step 2)
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'[^A-Za-z0-9\s.,!?]', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip() or "no content"

# Batch processing function
def process_batch(texts, batch_size=32):
    sentiments = []
    confidences = []
    for i in tqdm(range(0, len(texts), batch_size), desc="Processing"):
        batch_texts = texts[i:i + batch_size]
        inputs = tokenizer(batch_texts, return_tensors="pt", truncation=True, padding=True, max_length=256).to(device)
        with torch.no_grad():
            outputs = model(**inputs)
            probs = torch.softmax(outputs.logits, dim=-1).cpu().numpy()
        for prob in probs:
            sentiment = "Positive" if prob[1] > prob[0] else "Negative"
            confidence = max(prob)
            sentiments.append(sentiment)
            confidences.append(confidence)
    return sentiments, confidences

# Function to generate specific recommendations
def generate_recommendations(metrics, sentiments, confidences):
    """Generate specific, actionable recommendations based on sentiment metrics"""
    recommendations = []

    # Calculate additional metrics for deeper insights
    pos_confidence = np.mean([c for s, c in zip(sentiments, confidences) if s == "Positive"]) if "Positive" in sentiments else 0
    neg_confidence = np.mean([c for s, c in zip(sentiments, confidences) if s == "Negative"]) if "Negative" in sentiments else 0

    # Strong Positive Sentiment
    if metrics['positive_pct'] > 70 and metrics['avg_confidence'] > 0.7:
        recommendations.append(f"✅ High positive sentiment ({metrics['positive_pct']:.1f}%) with strong confidence ({pos_confidence:.3f}). Highlight specific positive feedback in marketing campaigns, such as product quality or customer service, to attract more customers.")

    # Strong Negative Sentiment
    elif metrics['positive_pct'] < 30 and metrics['avg_confidence'] > 0.7:
        recommendations.append(f"⚠️ High negative sentiment ({metrics['negative_pct']:.1f}%) with strong confidence ({neg_confidence:.3f}). Investigate common negative themes in reviews (e.g., product issues, delivery delays) and prioritize improvements in those areas.")

    # Balanced Sentiment
    elif 40 <= metrics['positive_pct'] <= 60:
        recommendations.append(f"⚖️ Balanced sentiment (Positive: {metrics['positive_pct']:.1f}%, Negative: {metrics['negative_pct']:.1f}%). Conduct a deeper analysis to understand mixed feedback. Consider surveying customers to identify specific pain points and strengths.")

    # Low Confidence
    if metrics['avg_confidence'] < 0.6:
        recommendations.append(f"🔍 Low average confidence ({metrics['avg_confidence']:.3f}). Some predictions may be unreliable. Manually review a sample of reviews with low confidence scores to validate the model's output and improve training data.")

    # High Confidence but Uneven Distribution
    elif metrics['avg_confidence'] > 0.8 and (metrics['positive_pct'] > 60 or metrics['negative_pct'] > 60):
        if metrics['positive_pct'] > metrics['negative_pct']:
            recommendations.append(f"📈 High confidence ({metrics['avg_confidence']:.3f}) in positive sentiment. Leverage this by sharing positive reviews on social media or in testimonials to build trust with potential customers.")
        else:
            recommendations.append(f"📉 High confidence ({metrics['avg_confidence']:.3f}) in negative sentiment. Act quickly by addressing customer complaints publicly (e.g., on social media) and offering solutions to rebuild trust.")

    # Default Recommendation
    if not recommendations:
        recommendations.append(f"➡️ Sentiment is moderately distributed (Positive: {metrics['positive_pct']:.1f}%, Negative: {metrics['negative_pct']:.1f}%). Monitor trends over time and consider gathering more detailed feedback through targeted surveys to uncover underlying issues or opportunities.")

    return "\n".join(recommendations)

# Batch analysis function with graph and specific recommendations
def accurate_batch_analysis(sample_size, csv_path):
    # Handle CSV path (use default if none uploaded)
    if csv_path is None:
        csv_path = '/content/Reviews.csv'

    try:
        df = pd.read_csv(csv_path, encoding='utf-8', on_bad_lines='skip')
    except Exception as e:
        return None, f"Error loading CSV: {str(e)}"

    df['Text'] = df['Text'].apply(clean_text)
    df = df[df['Text'] != "no content"]
    sample_size = min(int(sample_size), len(df))
    if sample_size <= 0:
        return None, "Error: Sample size must be greater than 0"

    df_sample = df.sample(n=sample_size, random_state=42)
    sentiments, confidences = process_batch(df_sample['Text'].tolist())

    # Calculate counts and metrics
    pos_count = sentiments.count("Positive")
    neg_count = sentiments.count("Negative")
    total = len(sentiments)

    metrics = {
        'positive_pct': (pos_count / total) * 100 if total > 0 else 0,
        'negative_pct': (neg_count / total) * 100 if total > 0 else 0,
        'avg_confidence': np.mean(confidences) if confidences else 0,
        'confidence_std': np.std(confidences) if confidences else 0,
        'total_reviews': total
    }

    # Create a simple horizontal bar graph
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=[pos_count, neg_count],
        y=['Positive', 'Negative'],
        orientation='h',
        marker_color=['#00cc96', '#ef553b'],  # Green for Positive, Red for Negative
        text=[f"{pos_count}", f"{neg_count}"],
        textposition='auto'
    ))
    fig.update_layout(
        title="Sentiment Distribution",
        xaxis_title="Count",
        yaxis_title="Sentiment",
        height=300,
        width=500,
        margin=dict(l=50, r=50, t=50, b=50),
        template="plotly_white"
    )

    # Generate report with specific recommendations
    report = f"""Analysis Report:
    - Total Reviews: {metrics['total_reviews']}
    - Positive: {metrics['positive_pct']:.1f}%
    - Negative: {metrics['negative_pct']:.1f}%
    - Avg Confidence: {metrics['avg_confidence']:.3f} (±{metrics['confidence_std']:.3f})

    Recommendations:
    {generate_recommendations(metrics, sentiments, confidences)}
    """

    # Debugging: Ensure figure is valid
    print("Figure created successfully")
    return fig, report

# Gradio interface with fallback
with gr.Blocks(title="Sentiment Analysis") as interface:
    gr.Markdown("# Sentiment Analysis")

    with gr.Row():
        with gr.Column():
            sample_size = gr.Slider(
                minimum=100,
                maximum=10000,
                value=1000,
                step=100,
                label="Sample Size"
            )
            csv_upload = gr.File(
                label="Upload CSV (optional)",
                file_types=[".csv"],
                type="filepath"
            )
            analyze_btn = gr.Button("Analyze", variant="primary")

        with gr.Column():
            plot_output = gr.Plot(label="Sentiment Distribution")
            report_output = gr.Textbox(label="Report", lines=10)
            # Fallback: Display graph as HTML if Plot fails
            html_output = gr.HTML(label="Fallback Graph (if Plot fails)")

    # Function to generate HTML fallback
    def create_html_fallback(fig):
        return fig.to_html(include_plotlyjs="cdn")

    # Connect button to function
    analyze_btn.click(
        fn=accurate_batch_analysis,
        inputs=[sample_size, csv_upload],
        outputs=[plot_output, report_output]
    ).then(
        fn=create_html_fallback,
        inputs=plot_output,
        outputs=html_output
    )

# Launch with debug mode
interface.launch(share=True, debug=True)

Model loaded on cuda
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://0d60c72682fdc47696.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Processing: 100%|██████████| 4/4 [00:01<00:00,  2.62it/s]


Figure created successfully


Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 2137, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 1663, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
           ^^^^^

Figure created successfully


Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 2137, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 1663, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
           ^^^^^

Figure created successfully


Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 2137, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 1663, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
           ^^^^^

Figure created successfully


Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 2137, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 1663, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
           ^^^^^

Figure created successfully


Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 2137, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/gradio/blocks.py", line 1663, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
           ^^^^^