In [2]:
!pip install memory_profiler

Collecting memory_profiler
  Downloading memory_profiler-0.61.0-py3-none-any.whl.metadata (20 kB)
Downloading memory_profiler-0.61.0-py3-none-any.whl (31 kB)
Installing collected packages: memory_profiler
Successfully installed memory_profiler-0.61.0


In [13]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score
import re
from typing import List, Tuple, Generator
import logging
from pathlib import Path
import joblib
import gc
from memory_profiler import profile

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class MemoryEfficientEmotionPredictor:
    def __init__(self, n_features=2**16):
        self.emotions = ["joy", "fear", "anger", "sadness", "disgust", "shame", "guilt"]
        self.emoji_map = {"joy":"😂", "fear":"😱", "anger":"😠", "sadness":"😢",
                         "disgust":"😒", "shame":"😳", "guilt":"😳"}
        # Use HashingVectorizer instead of DictVectorizer to save memory
        self.vectorizer = HashingVectorizer(n_features=n_features,
                                          alternate_sign=False,
                                          norm=None)
        # Use SGDClassifier for online learning
        self.model = SGDClassifier(loss='log_loss',
                                 max_iter=100,
                                 tol=1e-3,
                                 n_jobs=-1)

    def preprocess_text(self, text: str) -> str:
        """Memory-efficient text preprocessing"""
        # Convert to lowercase and remove special characters
        text = text.lower()
        text = re.sub(r'[^a-z0-9\s]', ' ', text)
        text = re.sub(r'\s+', ' ', text)
        return text.strip()

    def data_generator(self, file_path: str, batch_size: int = 1000) -> Generator:
        """Memory-efficient data loading using generator"""
        current_batch = []
        current_labels = []

        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                if len(current_batch) >= batch_size:
                    yield current_batch, current_labels
                    current_batch = []
                    current_labels = []
                    gc.collect()  # Force garbage collection

                line = line.strip()
                match = re.match(r'\[(.*?)\](.*)', line)
                if match:
                    label, text = match.groups()
                    current_batch.append(self.preprocess_text(text))
                    current_labels.append(self._convert_label(label))

            if current_batch:  # Yield remaining data
                yield current_batch, current_labels

    def _convert_label(self, label_str: str) -> str:
        """Convert numeric label to emotion name"""
        try:
            items = [int(float(x)) for x in label_str.split()]
            for i, val in enumerate(items):
                if val == 1:
                    return self.emotions[i]
            return "neutral"
        except Exception as e:
            logger.error(f"Label conversion error: {str(e)}")
            return "neutral"

    def train(self, file_path: str, batch_size: int = 1000):
        """Train model using batch processing"""
        logger.info("Starting training with batch processing...")

        for i, (batch_texts, batch_labels) in enumerate(self.data_generator(file_path, batch_size)):
            # Transform features using HashingVectorizer
            X_batch = self.vectorizer.transform(batch_texts)

            # Partial fit for online learning
            if i == 0:
                self.model.partial_fit(X_batch, batch_labels, classes=self.emotions)
            else:
                self.model.partial_fit(X_batch, batch_labels)

            logger.info(f"Processed batch {i+1} with {len(batch_texts)} samples")
            del X_batch
            gc.collect()

    def predict(self, text: str) -> Tuple[str, str]:
        """Predict emotion for a single text input"""
        # Preprocess and vectorize
        processed_text = self.preprocess_text(text)
        features = self.vectorizer.transform([processed_text])

        # Predict
        emotion = self.model.predict(features)[0]
        emoji = self.emoji_map.get(emotion, "")

        # Clean up
        del features
        gc.collect()

        return emotion, emoji

    def save_model(self, path: str):
        """Save model using joblib for better memory efficiency"""
        path = Path(path)
        path.mkdir(parents=True, exist_ok=True)
        joblib.dump(self.model, path / 'model.joblib', compress=3)
        logger.info(f"Model saved to {path}")

    def load_model(self, path: str):
        """Load model using joblib"""
        path = Path(path)
        self.model = joblib.load(path / 'model.joblib')
        logger.info(f"Model loaded from {path}")

def main():
    # Initialize predictor with memory optimization
    predictor = MemoryEfficientEmotionPredictor()

    try:
        # Train model with batch processing
        file_path = 'Emotion Prediction.csv'
        predictor.train(file_path, batch_size=1000)

        # Save model
        predictor.save_model('emotion_model')

        # Test predictions with memory-efficient processing
        test_sentences = [
            "I am crying",
            "I have a fear of dogs",
            "My dog died yesterday",
            "I don't love you anymore..!"
        ]

        for sentence in test_sentences:
            emotion, emoji = predictor.predict(sentence)
            print(f"\nText: {sentence}")
            print(f"Emotion: {emotion} {emoji}")
            gc.collect()  # Clean up after each prediction

        # Interactive prediction with memory management
        print("\nEnter sentences for emotion prediction (type 'q' to quit)")
        while True:
            text = input("\nEnter text: ").strip()
            if text.lower() == 'q':
                break

            emotion, emoji = predictor.predict(text)
            print(f"Predicted emotion: {emotion} {emoji}")
            gc.collect()

    except Exception as e:
        logger.error(f"Error in main execution: {str(e)}")
    finally:
        # Clean up resources
        del predictor
        gc.collect()

if __name__ == "__main__":
    main()


Text: I am crying
Emotion: sadness 😢

Text: I have a fear of dogs
Emotion: fear 😱

Text: My dog died yesterday
Emotion: sadness 😢

Text: I don't love you anymore..!
Emotion: joy 😂

Enter sentences for emotion prediction (type 'q' to quit)

Enter text: I have met my grandmother
Predicted emotion: sadness 😢

Enter text:  I was involved in a traffic accident.
Predicted emotion: fear 😱

Enter text: q
