In [None]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from transformers import (
    AutoTokenizer, 
    AutoModelForSequenceClassification, 
    Trainer, 
    TrainingArguments,
    BertTokenizer,
    BertForSequenceClassification
)
from datasets import Dataset, load_dataset
from sklearn.model_selection import train_test_split
import gradio as gr
import gdown
import os
from PIL import Image
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# Updated tone categories 
TONE_CATEGORIES = [
    'Technical / Informative',
    'Enthusiastic / Inspirational',
    'Casual / Conversational',
    'Professional / Neutral',
    'Promotional / Persuasive'
]

def extract_text_from_image(image) -> str:
    """
    Extract text from an uploaded image using OCR
    """
    try:
        if isinstance(image, str):
            image = Image.open(image)
        
        if image.mode != 'RGB':
            image = image.convert('RGB')
            
        extracted_text = pytesseract.image_to_string(image)
        return extracted_text.strip()
    except Exception as e:
        print(f"Error in text extraction: {str(e)}")
        return ""

def download_and_load_dataset():
    """
    Download the dataset from Google Drive and load it into a pandas DataFrame
    """
    # Google Drive file ID from the shared link
    file_id = '1-7jNMWQQkBziKqYnpDOfjkNg-mmCtRtd'
    output_path = 'website_tones.parquet'
    
    # Download the file if it doesn't exist
    if not os.path.exists(output_path):
        url = f'https://drive.google.com/uc?id={file_id}'
        gdown.download(url, output_path, quiet=False)
    
    # Read the parquet file
    df = pd.read_parquet(output_path)
    return df

class ToneAnalyzer:
    def __init__(self, model_name="xlm-roberta-base", num_labels=len(TONE_CATEGORIES)):
        """
        Initialize the ToneAnalyzer with XLM-RoBERTa model
        """
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(
            model_name,
            num_labels=num_labels
        )
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
        self.tone_categories = TONE_CATEGORIES

    def prepare_dataset(self, texts, labels=None):
        """Prepare dataset for training or inference"""
        encodings = self.tokenizer(
            texts,
            truncation=True,
            padding=True,
            max_length=512,
            return_tensors="pt"
        )
        
        if labels is not None:
            # Convert string labels to numerical indices
            label_map = {cat: idx for idx, cat in enumerate(self.tone_categories)}
            numeric_labels = [label_map[label] for label in labels]
            
            dataset = Dataset.from_dict({
                'input_ids': encodings['input_ids'],
                'attention_mask': encodings['attention_mask'],
                'labels': numeric_labels
            })
        else:
            dataset = Dataset.from_dict({
                'input_ids': encodings['input_ids'],
                'attention_mask': encodings['attention_mask'],
            })
        
        return dataset

    def fine_tune(self, train_texts, train_labels, validation_texts=None, validation_labels=None,
                 batch_size=8, num_epochs=1, learning_rate=2e-5):
        """Fine-tune the model on the provided dataset"""
        print("Preparing training dataset...")
        train_dataset = self.prepare_dataset(train_texts, train_labels)
        
        if validation_texts is not None and validation_labels is not None:
            print("Preparing validation dataset...")
            validation_dataset = self.prepare_dataset(validation_texts, validation_labels)
        else:
            print("Splitting into train/validation sets...")
            train_indices, val_indices = train_test_split(
                range(len(train_dataset)), 
                test_size=0.2, 
                random_state=42
            )
            validation_dataset = train_dataset.select(val_indices)
            train_dataset = train_dataset.select(train_indices)

        print("Setting up training arguments...")
        training_args = TrainingArguments(
            output_dir="./tone_analyzer_results",
            num_train_epochs=num_epochs,
            per_device_train_batch_size=batch_size,
            per_device_eval_batch_size=batch_size,
            warmup_steps=500,
            weight_decay=0.01,
            logging_dir="./tone_analyzer_logs",
            logging_steps=50,
            learning_rate=learning_rate,
            evaluation_strategy="epoch",
            save_strategy="epoch",
            load_best_model_at_end=True,
            report_to="none"
        )

        print("Starting training...")
        trainer = Trainer(
            model=self.model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=validation_dataset
        )

        trainer.train()
        
        print("Saving fine-tuned model...")
        self.model.save_pretrained("./tone_analyzer_finetuned")
        self.tokenizer.save_pretrained("./tone_analyzer_finetuned")
        print("Fine-tuning complete!")

    def predict_single(self, text):
        """Predict tone for a single text input"""
        inputs = self.tokenizer(
            text,
            truncation=True,
            padding=True,
            max_length=512,
            return_tensors="pt"
        ).to(self.device)

        self.model.eval()
        with torch.no_grad():
            outputs = self.model(**inputs)
            probabilities = torch.softmax(outputs.logits, dim=1)[0]

        top_tone_index = torch.argmax(probabilities).item()
        return self.tone_categories[top_tone_index]

class SentimentAnalyzer:
    def __init__(self, model_name="bert-base-uncased", num_labels=3):
        """
        Initialize the SentimentAnalyzer with BERT model
        """
        self.tokenizer = BertTokenizer.from_pretrained(model_name)
        self.model = BertForSequenceClassification.from_pretrained(
            model_name,
            num_labels=num_labels
        )
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
        self.sentiment_labels = ['Negative', 'Neutral', 'Positive']

    def prepare_dataset(self, texts, labels=None):
        """Prepare dataset for training or inference"""
        encodings = self.tokenizer(
            texts,
            truncation=True,
            padding=True,
            max_length=512,
            return_tensors="pt"
        )
        
        if labels is not None:
            dataset = Dataset.from_dict({
                'input_ids': encodings['input_ids'],
                'attention_mask': encodings['attention_mask'],
                'labels': labels
            })
        else:
            dataset = Dataset.from_dict({
                'input_ids': encodings['input_ids'],
                'attention_mask': encodings['attention_mask'],
            })
        
        return dataset

    def fine_tune(self, num_epochs=1, learning_rate=1e-5):
        """
        Fine-tune the model on the IMDB dataset
        """
        # Load IMDB dataset
        imdb_dataset = load_dataset('imdb')
        
        # Prepare train and validation datasets
        train_texts = imdb_dataset['train']['text']
        train_labels = [1 if label == 1 else 0 for label in imdb_dataset['train']['label']]
        
        val_texts = imdb_dataset['test']['text']
        val_labels = [1 if label == 1 else 0 for label in imdb_dataset['test']['label']]
        
        train_dataset = self.prepare_dataset(train_texts, train_labels)
        val_dataset = self.prepare_dataset(val_texts, val_labels)

        # Training arguments
        training_args = TrainingArguments(
            output_dir="./sentiment_analyzer_results",
            num_train_epochs=num_epochs,
            per_device_train_batch_size=8,
            per_device_eval_batch_size=8,
            warmup_steps=500,
            weight_decay=0.01,
            logging_dir="./sentiment_analyzer_logs",
            logging_steps=50,
            learning_rate=learning_rate,
            evaluation_strategy="epoch",
            save_strategy="epoch",
            load_best_model_at_end=True,
            report_to="none"
        )

        # Initialize Trainer
        trainer = Trainer(
            model=self.model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset
        )

        # Train the model
        trainer.train()

        # Save the fine-tuned model
        self.model.save_pretrained("./sentiment_analyzer_finetuned")
        self.tokenizer.save_pretrained("./sentiment_analyzer_finetuned")
        print("Fine-tuning complete!")

    def predict_sentiment(self, text):
        """Predict sentiment for a single text input"""
        inputs = self.tokenizer(
            text,
            truncation=True,
            padding=True,
            max_length=512,
            return_tensors="pt"
        ).to(self.device)

        self.model.eval()
        with torch.no_grad():
            outputs = self.model(**inputs)
            predictions = torch.softmax(outputs.logits, dim=1)
            predicted_class = torch.argmax(predictions, dim=1).item()

        return self.sentiment_labels[predicted_class]

def train_models():
    """Train both tone and sentiment models"""
    # Train Tone Analyzer
    print("Downloading and loading tone dataset...")
    df = download_and_load_dataset()
    
    print("Initializing Tone Analyzer...")
    tone_analyzer = ToneAnalyzer()
    
    print("Starting tone fine-tuning process...")
    tone_analyzer.fine_tune(
        train_texts=df['sentences'].tolist(),
        train_labels=df['tone_heuristic'].tolist(),
        batch_size=8,
        num_epochs=1,
        learning_rate=2e-5
    )
    
    # Train Sentiment Analyzer
    print("Initializing Sentiment Analyzer...")
    sentiment_analyzer = SentimentAnalyzer()
    
    print("Starting sentiment fine-tuning process...")
    sentiment_analyzer.fine_tune()

def create_gradio_interface():
    """Create and launch the Gradio interface with conditional analysis"""
    def analyze_content(text_input, image_input):
        pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

        # Initialize the analyzers with fine-tuned models
        tone_analyzer = ToneAnalyzer(model_name="./tone_analyzer_finetuned")
        sentiment_analyzer = SentimentAnalyzer(model_name="./sentiment_analyzer_finetuned")

        # Handling text input scenario
        if text_input and not image_input:
            # Only perform sentiment analysis for text input
            sentiment = sentiment_analyzer.predict_sentiment(text_input.strip())
            return f"Sentiment: {sentiment}"

        # Handling image input scenario
        elif image_input and not text_input:
            try:
                # Extract text from the image
                extracted_text = extract_text_from_image(image_input)
                
                if not extracted_text:
                    return "No text could be extracted from the image."
                
                # Only predict tone for image
                tone = tone_analyzer.predict_single(extracted_text)
                return f"Detected Tone: {tone}"
            
            except Exception as e:
                return f"Error processing image: {str(e)}"

        # Handling case where both or neither are provided
        elif not text_input and not image_input:
            return "Please provide either text or an image to analyze."
        else:
            return "Please provide either text or an image, but not both simultaneously."

    iface = gr.Interface(
        fn=analyze_content,
        inputs=[
            gr.Textbox(
                lines=5,
                placeholder="Enter text for sentiment analysis...",
                label="Input Text (Optional)"
            ),
            gr.Image(
                type="pil",
                label="Upload Image for Tone Detection (Optional)"
            )
        ],
        outputs=[
            gr.Textbox(
                label="Analysis Result",
                lines=3
            )
        ],
        title="Conditional Sentiment & Tone Analyzer",
        description="Analyze sentiment for text or detect tone from images.",
        examples=[
            ["This is a great product that solves many problems.", None],
            [None, "path/to/sample/image.jpg"]  # Replace with actual example image path
        ]
    )
    return iface

if __name__ == "__main__":
    # First, train the models
    print("Starting the training process...")
    train_models()
    
    # Then launch the Gradio interface
    print("Launching Gradio interface...")
    iface = create_gradio_interface()
    iface.launch(share=True)


Starting the training process...
Downloading and loading tone dataset...
Initializing Tone Analyzer...


Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting tone fine-tuning process...
Preparing training dataset...
Splitting into train/validation sets...
Setting up training arguments...
Starting training...




  0%|          | 0/120 [00:00<?, ?it/s]

{'loss': 1.6067, 'grad_norm': 4.705992698669434, 'learning_rate': 4.0000000000000003e-07, 'epoch': 0.08}
{'loss': 1.6116, 'grad_norm': 3.6831698417663574, 'learning_rate': 8.000000000000001e-07, 'epoch': 0.17}
{'loss': 1.6042, 'grad_norm': 5.108077049255371, 'learning_rate': 1.2000000000000002e-06, 'epoch': 0.25}
{'loss': 1.6087, 'grad_norm': 3.2815654277801514, 'learning_rate': 1.6000000000000001e-06, 'epoch': 0.33}
{'loss': 1.6242, 'grad_norm': 3.5876388549804688, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.42}
{'loss': 1.5908, 'grad_norm': 3.1172780990600586, 'learning_rate': 2.4000000000000003e-06, 'epoch': 0.5}
{'loss': 1.5924, 'grad_norm': 4.031172275543213, 'learning_rate': 2.8000000000000003e-06, 'epoch': 0.58}
{'loss': 1.5985, 'grad_norm': 4.316586494445801, 'learning_rate': 3.2000000000000003e-06, 'epoch': 0.67}
{'loss': 1.5883, 'grad_norm': 3.533423900604248, 'learning_rate': 3.6000000000000003e-06, 'epoch': 0.75}
{'loss': 1.5832, 'grad_norm': 2.4801716804504395, 'le

  0%|          | 0/30 [00:00<?, ?it/s]

{'eval_loss': 1.5753663778305054, 'eval_runtime': 368.7418, 'eval_samples_per_second': 1.296, 'eval_steps_per_second': 0.081, 'epoch': 1.0}
{'train_runtime': 7292.707, 'train_samples_per_second': 0.262, 'train_steps_per_second': 0.016, 'train_loss': 1.5963021914164226, 'epoch': 1.0}
Saving fine-tuned model...
Fine-tuning complete!
Initializing Sentiment Analyzer...


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Starting sentiment fine-tuning process...




  0%|          | 0/1563 [00:00<?, ?it/s]

{'loss': 0.9675, 'grad_norm': 7.191839218139648, 'learning_rate': 2.0000000000000002e-07, 'epoch': 0.01}
{'loss': 0.9703, 'grad_norm': 6.925292015075684, 'learning_rate': 4.0000000000000003e-07, 'epoch': 0.01}
{'loss': 0.9807, 'grad_norm': 5.493546009063721, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.02}
{'loss': 0.9541, 'grad_norm': 9.554792404174805, 'learning_rate': 8.000000000000001e-07, 'epoch': 0.03}
{'loss': 0.935, 'grad_norm': 7.233910083770752, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.03}
{'loss': 0.9242, 'grad_norm': 4.8051371574401855, 'learning_rate': 1.2000000000000002e-06, 'epoch': 0.04}
{'loss': 0.8791, 'grad_norm': 7.0243000984191895, 'learning_rate': 1.4000000000000001e-06, 'epoch': 0.04}
{'loss': 0.8502, 'grad_norm': 6.498027324676514, 'learning_rate': 1.6000000000000001e-06, 'epoch': 0.05}
{'loss': 0.8381, 'grad_norm': 5.533255577087402, 'learning_rate': 1.8000000000000001e-06, 'epoch': 0.06}
{'loss': 0.8187, 'grad_norm': 7.102581024169922, 'learni