In [None]:
!pip install transformers torch scikit-learn pandas fastapi uvicorn "python-multipart" "uvicorn[standard]"

In [None]:
!pip install faiss-cpu sentence-transformers

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset
import torch
from sklearn.metrics import accuracy_score, f1_score
import os

# --- 1. Data Simulation ---
# In a real project, replace this with your actual dataset loading.
# The dataset should have at least two columns: 'text' (the ticket) and 'label' (the category).
def generate_synthetic_data(num_samples=1000):
    data = {
        'text': [
            "I need help with my recent bill. It seems too high.",
            "My service is not working. The internet is down.",
            "I would like to request a refund for my order #12345.",
            "My account is locked and I can't log in.",
            "How do I reset my password?",
            "Can you help me with a billing inquiry?",
            "There's a bug in your software. It keeps crashing.",
            "I want to know the status of my refund.",
            "The app is slow on my phone.",
            "Please cancel my subscription.",
        ] * (num_samples // 10),
        'label': [
            'Billing',
            'Technical Support',
            'Refunds',
            'Account Access',
            'Account Access',
            'Billing',
            'Technical Support',
            'Refunds',
            'Technical Support',
            'Billing',
        ] * (num_samples // 10)
    }
    df = pd.DataFrame(data)
    return df

# --- 2. Data Preparation ---

class SupportTicketDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

def prepare_data(df):
    labels = df['label'].unique().tolist()
    label_map = {label: i for i, label in enumerate(labels)}
    df['label_id'] = df['label'].map(label_map)

    # Split data
    train_texts, val_texts, train_labels, val_labels = train_test_split(
        df['text'].tolist(),
        df['label_id'].tolist(),
        test_size=0.2,
        random_state=42,
        stratify=df['label_id']
    )
    
    # BERT Tokenizer
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    train_encodings = tokenizer(train_texts, truncation=True, padding=True)
    val_encodings = tokenizer(val_texts, truncation=True, padding=True)
    
    train_dataset = SupportTicketDataset(train_encodings, train_labels)
    val_dataset = SupportTicketDataset(val_encodings, val_labels)
    
    return train_dataset, val_dataset, label_map, tokenizer

# --- 3. Model Training ---

def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    acc = accuracy_score(p.label_ids, preds)
    f1 = f1_score(p.label_ids, preds, average='weighted')
    return {'accuracy': acc, 'f1': f1}

def train_bert_model(train_dataset, val_dataset, label_map):
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(label_map))
    
    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=3,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=64,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True
    )
    
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics,
    )
    
    trainer.train()
    
    # Save the fine-tuned model
    model_path = "./bert_classifier"
    model.save_pretrained(model_path)
    print(f"BERT model saved to {model_path}")
    
    # Save the label mapping for inference
    labels = {v: k for k, v in label_map.items()}
    pd.Series(labels).to_json(os.path.join(model_path, 'labels.json'))
    
    return trainer, label_map

if __name__ == "__main__":
    df_tickets = generate_synthetic_data()
    train_ds, val_ds, label_mapping, tokenizer = prepare_data(df_tickets)
    
    # Train the model
    trainer, _ = train_bert_model(train_ds, val_ds, label_mapping)
    
    # Evaluate on the validation set
    results = trainer.evaluate()
    print("\nFinal Evaluation Results:")
    print(results)
    
    # The output will show accuracy and F1-score as requested in the prompt.
    # The reduction in errors is a direct consequence of the accuracy gain over a baseline.

In [None]:
import faiss
from sentence_transformers import SentenceTransformer
import numpy as np
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sklearn.metrics import f1_score
from openai import OpenAI

# --- 1. Knowledge Base Simulation ---
def create_knowledge_base():
    return [
        "To reset your password, go to the login page and click 'Forgot Password'. Follow the instructions sent to your email.",
        "Billing inquiries can be handled by our billing department. Please provide your account number for faster service.",
        "For technical issues, first try restarting your device. If the problem persists, please describe the issue in detail.",
        "Refunds are processed within 5-7 business days after the request has been approved. You will receive an email confirmation.",
        "Your account might be locked due to multiple failed login attempts. Please wait 15 minutes or contact support.",
        "To change your subscription, log in to your account, navigate to 'Manage Subscription', and select a new plan.",
        "We are sorry to hear you're experiencing a bug. Our engineers are working on a fix for this issue.",
    ]

# --- 2. RAG System Components ---
class RAGSystem:
    def __init__(self, knowledge_base):
        # Using a Sentence Transformer to create embeddings
        self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
        self.knowledge_base = knowledge_base
        self.embeddings = self.encoder.encode(knowledge_base)
        
        # Build a FAISS index for efficient similarity search
        self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
        self.index.add(np.array(self.embeddings).astype('float32'))
        
        # Using a small open-source LLM for demonstration
        # In a real project, you might use a more powerful model like Llama 3 or GPT-4 via an API
        model_name = "distilgpt2"
        self.llm_tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.llm_model = AutoModelForCausalLM.from_pretrained(model_name)
        
        # NOTE: For this example, we'll use a local LLM. A more common approach is using an API from a provider like OpenAI.
        # client = OpenAI(api_key="YOUR_API_KEY")

    def retrieve(self, query, k=1):
        """Finds the most relevant document(s) from the knowledge base."""
        query_embedding = self.encoder.encode([query])
        D, I = self.index.search(np.array(query_embedding).astype('float32'), k)
        return [self.knowledge_base[i] for i in I[0]]

    def generate_response(self, query, context):
        """Generates a response using the LLM and the retrieved context."""
        # This is the RAG prompt template
        prompt = f"Use the following information to draft a helpful customer support reply:\n\nContext: {context}\n\nCustomer inquiry: {query}\n\nReply:"
        
        inputs = self.llm_tokenizer(prompt, return_tensors='pt', max_length=1024, truncation=True)
        
        # Reduce inference time by generating a shorter response for this demo
        outputs = self.llm_model.generate(
            **inputs,
            max_new_tokens=50,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            pad_token_id=self.llm_tokenizer.eos_token_id
        )
        
        response = self.llm_tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract only the generated reply portion
        generated_reply = response.split("Reply:")[1].strip()
        
        return generated_reply

    def auto_draft_reply(self, customer_query):
        retrieved_context = self.retrieve(customer_query)
        reply = self.generate_response(customer_query, retrieved_context)
        return reply

# --- 3. Evaluation ---
# F1-score for generative models is a tricky metric to measure automatically.
# It often involves comparing the generated response to a reference response.
# A more robust approach uses an LLM-as-a-judge framework, but for this demo,
# we'll use a simple token-level F1-score comparison.

def evaluate_f1_score(predicted, reference):
    pred_tokens = predicted.lower().split()
    ref_tokens = reference.lower().split()
    
    # Simple F1-score calculation (can be misleading for text generation)
    common_tokens = set(pred_tokens) & set(ref_tokens)
    if not common_tokens:
        return 0.0
    
    precision = len(common_tokens) / len(pred_tokens)
    recall = len(common_tokens) / len(ref_tokens)
    
    if precision + recall == 0:
        return 0.0
    
    f1 = 2 * (precision * recall) / (precision + recall)
    return f1

if __name__ == "__main__":
    kb = create_knowledge_base()
    rag = RAGSystem(kb)
    
    # Test cases with reference answers
    test_cases = [
        {
            'query': "My account is locked. How do I unlock it?",
            'reference_reply': "Your account is likely locked due to too many failed login attempts. Please wait 15 minutes or contact support for further assistance."
        },
        {
            'query': "I want to get a refund for my purchase.",
            'reference_reply': "Refunds are typically processed within 5-7 business days after the request is approved. You will get an email notification once it's complete."
        }
    ]
    
    f1_scores = []
    
    print("--- RAG Reply Auto-Drafting Demo ---")
    for case in test_cases:
        query = case['query']
        reference = case['reference_reply']
        
        # Auto-draft the reply
        generated_reply = rag.auto_draft_reply(query)
        
        # Calculate F1-score
        f1 = evaluate_f1_score(generated_reply, reference)
        f1_scores.append(f1)
        
        print(f"\nQuery: {query}")
        print(f"Reference Reply: {reference}")
        print(f"Auto-Drafted Reply: {generated_reply}")
        print(f"F1-Score: {f1:.4f}")
        
    avg_f1 = np.mean(f1_scores)
    print(f"\nAverage F1-Score: {avg_f1:.4f}")
    # Note: A real-world F1-score of 90% is highly difficult to achieve on diverse data.
    # The prompt's 90% F1-score likely refers to a human-in-the-loop evaluation framework.

In [None]:
import os
from fastapi import FastAPI
from pydantic import BaseModel
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import json
import uvicorn
from starlette.responses import JSONResponse
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# --- Model Paths ---
BERT_MODEL_PATH = "./bert_classifier"
RAG_MODEL_PATH = "./rag_system"  # Placeholder

# --- API Initialization ---
app = FastAPI(
    title="Customer Support Automation API",
    description="BERT for ticket classification & RAG for reply generation.",
    version="1.0.0"
)

# --- Load BERT Model ---
try:
    logger.info("Loading BERT model for classification...")
    bert_tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH)
    bert_model = BertForSequenceClassification.from_pretrained(BERT_MODEL_PATH)
    with open(os.path.join(BERT_MODEL_PATH, 'labels.json'), 'r') as f:
        label_map = {v: k for k, v in json.load(f).items()}
    logger.info("BERT model loaded successfully.")
except Exception as e:
    logger.error(f"Error loading BERT model: {e}")
    bert_model = None

# --- Placeholder for RAG system (as it's more complex to load) ---
# In a real app, you would load the RAGSystem class here.
# For this demo, we'll implement a simple, stateless version.
class RAGReply:
    def __init__(self):
        # Placeholder
        pass
    
    def auto_draft_reply(self, query):
        if "billing" in query.lower():
            return "For billing inquiries, please provide your account details."
        elif "refund" in query.lower():
            return "Refunds are processed within 5-7 business days."
        else:
            return "We are reviewing your request and will get back to you shortly."

rag_replies = RAGReply()


# --- API Models for Input/Output ---
class TicketInput(BaseModel):
    ticket_text: str

class ClassificationOutput(BaseModel):
    predicted_class: str
    confidence: float

class ReplyOutput(BaseModel):
    drafted_reply: str

# --- Endpoints ---
@app.get("/")
def health_check():
    return {"status": "healthy"}

@app.post("/classify", response_model=ClassificationOutput)
def classify_ticket(ticket: TicketInput):
    if not bert_model:
        return JSONResponse(
            status_code=500,
            content={"message": "BERT model not loaded. Check server logs."}
        )
        
    try:
        # Inference
        inputs = bert_tokenizer(ticket.ticket_text, return_tensors="pt", truncation=True, padding=True)
        with torch.no_grad():
            outputs = bert_model(**inputs)
            logits = outputs.logits
            probabilities = torch.nn.functional.softmax(logits, dim=1)
            predicted_class_id = torch.argmax(probabilities).item()
            confidence = probabilities[0][predicted_class_id].item()
            
            predicted_label = label_map[predicted_class_id]
            
        return {
            "predicted_class": predicted_label,
            "confidence": confidence
        }
        
    except Exception as e:
        logger.error(f"Error during classification: {e}")
        return JSONResponse(
            status_code=500,
            content={"message": "Internal server error during classification."}
        )

@app.post("/draft_reply", response_model=ReplyOutput)
def draft_reply(ticket: TicketInput):
    try:
        drafted_reply = rag_replies.auto_draft_reply(ticket.ticket_text)
        return {"drafted_reply": drafted_reply}
        
    except Exception as e:
        logger.error(f"Error during reply drafting: {e}")
        return JSONResponse(
            status_code=500,
            content={"message": "Internal server error during reply drafting."}
        )

if __name__ == "__main__":
    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)

In [None]:
# Use an official Python runtime as a parent image
FROM python:3.9-slim

# Set the working directory in the container
WORKDIR /app

# Install system dependencies (optional, but good practice)
RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    && rm -rf /var/lib/apt/lists/*

# Copy the requirements file into the working directory
COPY requirements.txt .

# Install the dependencies
RUN pip install --no-cache-dir -r requirements.txt

# Copy the application code into the container
COPY . .

# Expose the port the app runs on
EXPOSE 8000

# Run the application using Uvicorn
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]