In [1]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import T5ForConditionalGeneration, T5Tokenizer
from gramformer import Gramformer
import transformers
import torch
import spacy
import time

  from .autonotebook import tqdm as notebook_tqdm


## Ignore warnings

In [2]:
import warnings
warnings.filterwarnings("ignore")
transformers.logging.set_verbosity_error()

## Optimized Gramformer for sentence correction

In [3]:
class OptimizedT5Corrector:
    def __init__(self, debug=True):
        # Load model and tokenizer directly
        self.model_name = "prithivida/grammar_error_correcter_v1"
        self.tokenizer = T5Tokenizer.from_pretrained(self.model_name)
        self.model = T5ForConditionalGeneration.from_pretrained(self.model_name)
        
        # Optimize model for inference
        self.model.eval()
        
        # Use torch.compile for PyTorch 2.0+ (significant speedup)
        if hasattr(torch, 'compile'):
            try:
                self.model = torch.compile(self.model)
                if debug: print("Successfully applied torch.compile optimization")
            except Exception as e:
                if debug: print(f"Could not apply torch.compile: {e}")
        
        # Optimize memory usage
        self.model.config.use_cache = True
        
    def correct(self, sentence, max_length=128):
        # Start measuring inference time
        start_time = time.time()
        
        # Apply inference optimizations
        with torch.inference_mode():
            # Prepare input - the "gec:" prefix is important for the model
            input_text = f"gec: {sentence}"
            input_ids = self.tokenizer(input_text, return_tensors="pt").input_ids
            
            # Optimize generation parameters for speed
            outputs = self.model.generate(
                input_ids=input_ids,
                max_length=max_length,
                num_beams=2,  # Reduced from 5 for speed
                early_stopping=True,
                use_cache=True  # Enable KV caching for faster generation
            )
            
            # Decode output
            corrected_sentence = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # End measuring inference time
        end_time = time.time()
        inference_time = end_time - start_time
        
        return {
            "corrected_sentence": corrected_sentence,
            "inference_time_seconds": inference_time
        }

## Polarity detection with Spacy

In [4]:
def analyze_sentence_type(nlp, text):
    # Measure inference time
    start_time = time.time()

    # Parse the sentence using spaCy
    doc = nlp(text)

    # Check for negation by looking for the 'neg' dependency tag
    has_negation = any(token.dep_ == "neg" for token in doc)
    
    # Determine sentence type
    sentence_type = "negation" if has_negation else "affirmation"

    end_time = time.time()
    inference_time = end_time - start_time
    
    return {
        'sentence': text,
        'sentence_type': sentence_type,
        'inference_time': inference_time
    }

## Subjectivity detection with a pre-trained model

In [5]:
def classify_sentence(model, tokenizer, sentence):
    # Set device (GPU if available, otherwise CPU)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    model.eval()
    
    # Measure inference time
    start_time = time.time()
    
    # Tokenize input and move to device
    inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True).to(device)
    
    # Perform inference
    with torch.inference_mode():
        outputs = model(**inputs)
    
    # Get probabilities
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    
    # Assuming: 0 = objective/fact, 1 = subjective/opinion
    obj_score = probs[0][1].item()
    subj_score = probs[0][0].item()
    classification = "fact" if obj_score > subj_score else "opinion"
    
    end_time = time.time()
    inference_time = end_time - start_time
    
    # Prepare results
    result = {
        "sentence": sentence,
        "objective_score": obj_score,
        "subjective_score": subj_score,
        "classification": classification,
        "inference_time_seconds": inference_time
    }
    
    return result

## Emotion classification with DistilBERT

In [6]:
class EnhancedEmotionClassifier:
    def __init__(self, model_name="joeddav/distilbert-base-uncased-go-emotions-student"):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
        self.model.eval()
        
        # GoEmotions labels with neutral
        self.labels = [
            "admiration", "amusement", "anger", "annoyance", "approval", "caring",
            "confusion", "curiosity", "desire", "disappointment", "disapproval",
            "disgust", "embarrassment", "excitement", "fear", "gratitude", "grief",
            "joy", "love", "nervousness", "neutral", "optimism", "pride", "realization",
            "relief", "remorse", "sadness", "surprise"
        ]
    
    def classify(self, text):
        # Tokenize and prepare input
        inputs = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True)
        
        # Measure inference time
        start_time = time.time()

        # Perform inference
        with torch.inference_mode():
            outputs = self.model(**inputs)
            
        # Get predictions
        scores = torch.sigmoid(outputs.logits)[0]  # Use sigmoid for multi-label
        
        # Get the highest-scoring emotion
        top_score, top_index = torch.max(scores, dim=0)
        top_emotion = self.labels[top_index.item()]
        
        return {
            "emotion": top_emotion,
            "score": top_score.item(),
            "inference_time_seconds": time.time() - start_time
        }

## Sentence classification

In [9]:
# Preload models
nlp = spacy.load("en_core_web_sm")
subjectivity_model_name = "lighteternal/fact-or-opinion-xlmr-el"
subjectivity_tokenizer = AutoTokenizer.from_pretrained(subjectivity_model_name)
subjectivity_model = AutoModelForSequenceClassification.from_pretrained(subjectivity_model_name)
emotion_classifier = EnhancedEmotionClassifier()

# Test sentence
test_sentence = "I just finishd read the report. I am glad it contens the information we expected!"

# Grammar correction
print("Checking for grammar correction...")
corrector = OptimizedT5Corrector(debug=False)
corrected = corrector.correct(test_sentence)
print(f"Corrected sentence: {corrected['corrected_sentence']}")
print(f"Inference time: {corrected['inference_time_seconds']:.6f} s")
corrected = corrected['corrected_sentence']

# Polarity analysis
print("\nChecking polarity...")
polarity = analyze_sentence_type(nlp, corrected)
print(f"Polarity of the sentence: {polarity['sentence_type']}")
print(f"Inference time: {polarity['inference_time']:.6f} s")

# Subjectivity analysis
print("\nChecking subjectivity...")
subjectivity = classify_sentence(subjectivity_model, subjectivity_tokenizer, corrected)
if subjectivity['classification'] == "fact": score = subjectivity['objective_score']
else: score = subjectivity['subjective_score']
print(f"Sentence subjectivity: {subjectivity['classification']} (Score: {score:.6f})")
print(f"Inference time: {subjectivity['inference_time_seconds']:.6f} s")

# Emotion analysis
print("\nChecking emotions...")
sentiment = emotion_classifier.classify(corrected)
print(f"Classified emotion: {sentiment['emotion']} (Score: {sentiment['score']:.6f})")
print(f"Inference time: {sentiment['inference_time_seconds']:.6f} s")

Checking for grammar correction...
Corrected sentence: I just finished reading the report. I am glad it contains the information we expected!
Inference time: 2.144632 s

Checking polarity...
Polarity of the sentence: affirmation
Inference time: 0.015109 s

Checking subjectivity...
Sentence subjectivity: opinion (Score: 0.997128)
Inference time: 0.168141 s

Checking emotions...
Classified emotion: gratitude (Score: 0.914095)
Inference time: 0.089006 s
