In [1]:
import urllib.request
import json
import pickle
import numpy as np

class AzureEmotionPredictor:
    """
    Handles emotion prediction via Azure endpoint, decoding, and post-processing.
    Ensures sub-emotion is valid for the predicted main emotion.
    """

    def __init__(self, api_key, endpoint_url, encoders_dir="models/encoders"):
        """
        Initialize with API key, endpoint URL, and encoder directory.
        """
        self.api_key = api_key
        self.endpoint_url = endpoint_url
        self.encoders_dir = encoders_dir
        self.emotion_mapping = {
            "curiosity": "happiness",
            "neutral": "neutral",
            "annoyance": "anger",
            "confusion": "surprise",
            "disappointment": "sadness",
            "excitement": "happiness",
            "surprise": "surprise",
            "realization": "surprise",
            "desire": "happiness",
            "approval": "happiness",
            "disapproval": "disgust",
            "embarrassment": "fear",
            "admiration": "happiness",
            "anger": "anger",
            "optimism": "happiness",
            "sadness": "sadness",
            "joy": "happiness",
            "fear": "fear",
            "remorse": "sadness",
            "gratitude": "happiness",
            "disgust": "disgust",
            "love": "happiness",
            "relief": "happiness",
            "grief": "sadness",
            "amusement": "happiness",
            "caring": "happiness",
            "nervousness": "fear",
            "pride": "happiness",
        }
        self._load_encoders()

    def _load_encoders(self):
        """
        Load label encoders for emotion, sub_emotion, and intensity.
        """
        try:
            with open(f"{self.encoders_dir}/emotion_encoder.pkl", "rb") as f:
                self.emotion_encoder = pickle.load(f)
            with open(f"{self.encoders_dir}/sub_emotion_encoder.pkl", "rb") as f:
                self.sub_emotion_encoder = pickle.load(f)
            with open(f"{self.encoders_dir}/intensity_encoder.pkl", "rb") as f:
                self.intensity_encoder = pickle.load(f)
        except Exception as e:
            raise RuntimeError(f"Failed to load encoders: {e}")

    def get_prediction(self, text):
        """
        Send a request to the Azure endpoint and return the raw response.
        """
        data = {"text": text}
        body = str.encode(json.dumps(data))
        headers = {
            'Content-Type': 'application/json',
            'Accept': 'application/json',
            'Authorization': 'Bearer ' + self.api_key
        }
        req = urllib.request.Request(self.endpoint_url, body, headers)
        try:
            response = urllib.request.urlopen(req)
            result = response.read()
            return json.loads(result.decode("utf-8"))
        except urllib.error.HTTPError as error:
            raise RuntimeError(f"Request failed: {error.code} {error.read().decode('utf8', 'ignore')}")

    def decode_and_postprocess(self, raw_predictions):
        """
        Decode raw predictions and post-process sub-emotion to ensure consistency.
        """
        # Defensive: handle both logits and index predictions
        try:
            # If predictions are logits, take argmax
            if isinstance(raw_predictions.get("emotion"), (list, np.ndarray)):
                # The raw_predictions are nested inside another list, so we take the first element [0]
                emotion_idx = int(np.argmax(raw_predictions["emotion"][0]))
            else:
                emotion_idx = int(raw_predictions["emotion"])
            if isinstance(raw_predictions.get("sub_emotion"), (list, np.ndarray)):
                 # The raw_predictions are nested inside another list, so we take the first element [0]
                sub_emotion_logits = np.array(raw_predictions["sub_emotion"][0])
                # Will post-process below
            else:
                sub_emotion_logits = None
                sub_emotion_idx = int(raw_predictions["sub_emotion"])
            if isinstance(raw_predictions.get("intensity"), (list, np.ndarray)):
                 # The raw_predictions are nested inside another list, so we take the first element [0]
                intensity_idx = int(np.argmax(raw_predictions["intensity"][0]))
            else:
                intensity_idx = int(raw_predictions["intensity"])
        except Exception as e:
            raise ValueError(f"Malformed raw predictions: {e}")

        # Decode main emotion
        emotion = self.emotion_encoder.inverse_transform([emotion_idx])[0]

        # Post-process sub-emotion: only allow sub-emotions that map to the predicted emotion
        if sub_emotion_logits is not None:
            sub_emotion_classes = self.sub_emotion_encoder.classes_
            # Get valid sub-emotion indices for this emotion
            valid_indices = [
                i for i, label in enumerate(sub_emotion_classes)
                if self.emotion_mapping.get(label) == emotion
            ]
            if valid_indices:
                # Pick the valid sub-emotion with highest logit
                best_idx = valid_indices[np.argmax(sub_emotion_logits[valid_indices])]
                sub_emotion = sub_emotion_classes[best_idx]
            else:
                # Fallback: pick the most probable sub-emotion
                sub_emotion = self.sub_emotion_encoder.inverse_transform([int(np.argmax(sub_emotion_logits))])[0]
        else:
            # If only index is given, just decode
            sub_emotion = self.sub_emotion_encoder.inverse_transform([sub_emotion_idx])[0]
            # Optionally, check mapping and fallback if not valid
            if self.emotion_mapping.get(sub_emotion) != emotion:
                # Fallback: pick first valid sub-emotion for this emotion
                sub_emotion_classes = self.sub_emotion_encoder.classes_
                valid_labels = [label for label in sub_emotion_classes if self.emotion_mapping.get(label) == emotion]
                if valid_labels:
                    sub_emotion = valid_labels[0]

        # Decode intensity
        intensity = self.intensity_encoder.inverse_transform([intensity_idx])[0]

        return {
            "emotion": emotion,
            "sub_emotion": sub_emotion,
            "intensity": intensity
        }

    def predict(self, text: str) -> dict:
        """
        Full workflow: get prediction, decode, and post-process.
        Handles double-encoded JSON from the API.
        """

        max_retry = 5

        for retry_count in range(max_retry):

            try:

                # 1. Get the initial response from the API (which is a string containing JSON)
                api_response_string = self.get_prediction(text)

                # 2. Parse this string to get the actual dictionary payload
                payload_dict = json.loads(api_response_string)

                # 3. Get raw prediction
                raw_predictions = payload_dict.get("raw_predictions")

                # 4. Now, pass the clean dictionary of predictions to be processed
                output = self.decode_and_postprocess(raw_predictions)

                # Break the loop
                break

            except:
                continue

        #    We also check if the status is "success" as a safeguard.
        if payload_dict.get("status") != "success":
            raise RuntimeError(f"API did not return a success status. Full response: {payload_dict}")

        return output


# Example usage:
predictor = AzureEmotionPredictor(
    api_key='72fNK2ogONHXje1XShvxvQ3mZUKZN8kErOTKnZBx1xaEd9apKnoZJQQJ99BFAAAAAAAAAAAAINFRAZML3cS9', 
    endpoint_url='http://194.171.191.227:30526/api/v1/endpoint/deberta-emotion-clf-endpoint/score', 
    encoders_dir="../models/encoders")
result = predictor.predict("i'm really admiring what you do")
print(result)

{'emotion': 'happiness', 'sub_emotion': 'admiration', 'intensity': 'moderate'}
