In [18]:
import asyncio
import json
import logging
import time
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple, Union
from dataclasses import dataclass, field
from enum import Enum
import hashlib
import numpy as np
import pandas as pd
from collections import Counter, defaultdict
import pickle
import os
import sys
from pathlib import Path

# Core ML and NLP libraries
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE
import spacy
from langdetect import detect, detect_langs

from transformers import (
    AutoTokenizer, AutoModel, AutoModelForSequenceClassification,
    get_linear_schedule_with_warmup, pipeline
)
from torch.optim import AdamW

# Infrastructure and monitoring
import redis
import boto3
from prometheus_client import Counter, Histogram, Gauge, start_http_server
import pydantic
from pydantic import BaseModel, Field
from fastapi import FastAPI, HTTPException, Depends, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
import uvicorn

# Async and concurrency
import aiohttp
from redis.asyncio import Redis
from concurrent.futures import ThreadPoolExecutor
from threading import Lock

# Specialized libraries
import plotly.graph_objects as go
import plotly.express as px
from textblob import TextBlob
import openai  # For GPT integration
import anthropic  # For Claude integration

# Database
import sqlite3
import pymongo
from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime, Boolean, Text
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

In [22]:
# CONFIGURATION & DATA MODELS
# Database Models
from sqlalchemy.orm import declarative_base  # Updated import
class SentimentLabel(Enum):
    NEGATIVE = 0
    NEUTRAL = 1  
    POSITIVE = 2

@dataclass
class PipelineConfig:
    """Central configuration for the entire pipeline"""
    # Model Configuration
    foundation_model: str = "roberta-large"
    custom_model_path: str = "models/amazon-food-sentiment-v2"
    embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2"
    
    # Processing Configuration
    max_sequence_length: int = 512
    batch_size: int = 64
    num_workers: int = 8
    
    # Business Configuration
    confidence_threshold: float = 0.85
    human_review_threshold: float = 0.7
    priority_categories: List[str] = field(default_factory=lambda: ["organic", "baby food", "dietary supplements"])
    
    # Infrastructure Configuration
    redis_url: str = "redis://localhost:6379"
    model_registry_url: str = "s3://amazon-ml-models/food-sentiment/"
    feature_store_url: str = "s3://amazon-feature-store/"
    db_url: str = "sqlite:///sentiment_pipeline.db"
    
    # Monitoring Configuration
    metrics_port: int = 8000
    alert_thresholds: Dict[str, float] = field(default_factory=lambda: {
        "accuracy_drop": 0.05,
        "latency_p95": 200,  # milliseconds
        "error_rate": 0.01
    })

class ReviewInput(BaseModel):
    """Input schema for review processing"""
    review_id: str
    user_id: str
    product_id: str
    text: str
    rating: int = Field(ge=1, le=5)
    product_category: Optional[str] = None
    reviewer_history: Optional[Dict] = None
    timestamp: datetime = Field(default_factory=datetime.utcnow)
    language: Optional[str] = None
    is_verified_purchase: bool = True

class SentimentOutput(BaseModel):
    """Output schema for sentiment analysis"""
    review_id: str
    sentiment: str
    confidence: float
    sentiment_scores: Dict[str, float]
    business_impact_score: float
    requires_human_review: bool
    key_phrases: List[str]
    product_specific_insights: Dict[str, Union[str, float]]
    processing_metadata: Dict[str, Union[str, float, int]]

# Database Models
Base = declarative_base()

class ReviewRecord(Base):
    __tablename__ = "reviews"
    
    id = Column(Integer, primary_key=True)
    review_id = Column(String, unique=True, index=True)
    user_id = Column(String, index=True)
    product_id = Column(String, index=True)
    text = Column(Text)
    rating = Column(Integer)
    product_category = Column(String)
    timestamp = Column(DateTime)
    is_verified_purchase = Column(Boolean)
    
class SentimentRecord(Base):
    __tablename__ = "sentiment_results"
    
    id = Column(Integer, primary_key=True)
    review_id = Column(String, index=True)
    sentiment = Column(String)
    confidence = Column(Float)
    positive_score = Column(Float)
    neutral_score = Column(Float)
    negative_score = Column(Float)
    business_impact_score = Column(Float)
    requires_human_review = Column(Boolean)
    processing_time = Column(Float)
    model_version = Column(String)
    created_at = Column(DateTime, default=datetime.utcnow)