In [2]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv( r"C:\Users\Kaushal\Desktop\AI-ticket-resolution\data\software_ticket_resolution_70k.csv")

In [5]:
df.head(25)

Unnamed: 0,ticket_id,title,description,tech_stack,error_logs,priority,resolution,status
0,TCK-000001,API returns 500 error,The application crashes when handling concurre...,Docker Kubernetes,OutOfMemoryError,Low,Optimized database queries and increased conne...,Resolved
1,TCK-000002,API returns 500 error,Service becomes unavailable after new release.,FastAPI SQLAlchemy MySQL,413 Request Entity Too Large,Medium,Increased worker count and optimized async han...,Resolved
2,TCK-000003,FastAPI app crashes under load,Users are seeing internal server errors interm...,Node.js Express MongoDB,OutOfMemoryError,High,Updated server upload size limits.,Resolved
3,TCK-000004,FastAPI app crashes under load,Service becomes unavailable after new release.,Node.js Express MongoDB,503 Service Unavailable,Medium,Limited worker concurrency and fixed memory leak.,Resolved
4,TCK-000005,Slow API response time,Uploads above size limit fail unexpectedly.,Docker Kubernetes,RuntimeError: Event loop is closed,Low,Adjusted token expiry and synchronized auth se...,Resolved
5,TCK-000006,CI pipeline failing intermittently,Container exits immediately after startup.,Django REST PostgreSQL,OutOfMemoryError,Medium,Fixed unhandled exception and added proper err...,Resolved
6,TCK-000007,API returns 500 error,Valid users are logged out unexpectedly.,FastAPI SQLAlchemy MySQL,ModuleNotFoundError,Medium,Updated server upload size limits.,Resolved
7,TCK-000008,Docker container fails to start,The application crashes when handling concurre...,Flask Gunicorn Nginx,503 Service Unavailable,Low,Adjusted token expiry and synchronized auth se...,Resolved
8,TCK-000009,API returns 500 error,Service becomes unavailable after new release.,Docker Kubernetes,413 Request Entity Too Large,Medium,Updated server upload size limits.,Resolved
9,TCK-000010,Memory leak in background worker,Users are seeing internal server errors interm...,FastAPI Uvicorn PostgreSQL,OutOfMemoryError,Medium,Fixed unhandled exception and added proper err...,Resolved


In [13]:
df.groupby('tech_stack')['ticket_id'].nunique()

tech_stack
Django REST PostgreSQL        8989
Docker Kubernetes             8859
FastAPI Redis Celery          8850
FastAPI SQLAlchemy MySQL      8789
FastAPI Uvicorn PostgreSQL    8694
Flask Gunicorn Nginx          8594
Node.js Express MongoDB       8560
Spring Boot MySQL             8665
Name: ticket_id, dtype: int64

In [16]:
df.isnull().sum()

ticket_id      0
title          0
description    0
tech_stack     0
error_logs     0
priority       0
resolution     0
status         0
dtype: int64

In [1]:
#pip install tf-keras


# Embedding

In [19]:
# app/embedder.py

from sentence_transformers import SentenceTransformer
from typing import List
import pandas as pd

class TicketEmbedder:
    def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
        """
        Initializes embedding model.
        """
        self.model = SentenceTransformer(model_name)

    @staticmethod
    def prepare_text(row: pd.Series) -> str:
        """
        Combine relevant ticket fields into a single text block.
        """
        return f"""
        Title: {row['title']}
        Description: {row['description']}
        Error Logs: {row['error_logs']}
        Resolution: {row['resolution']}
        """

    def embed_tickets(self, df: pd.DataFrame) -> List[List[float]]:
        """
        Generate embeddings for all tickets.
        """
        texts = df.apply(self.prepare_text, axis=1).tolist()
        embeddings = self.model.encode(
            texts,
            show_progress_bar=True,
            convert_to_numpy=True
        )
        return embeddings





## Faiss_index

In [None]:
# app/faiss_index.py

import faiss
import numpy as np
from typing import Tuple


class TicketFAISSIndex:
    """
    FAISS index for semantic search over support tickets.
    Uses Inner Product similarity (cosine similarity with normalized vectors).
    """

    def __init__(self, embedding_dim: int):
        """
        Initialize FAISS index.

        Args:
            embedding_dim (int): Dimension of embedding vectors (e.g. 384)
        """
        self.embedding_dim = embedding_dim
        self.index = faiss.IndexFlatIP(embedding_dim)

    def add_embeddings(self, embeddings: np.ndarray):
        """
        Add ticket embeddings to FAISS index.

        Args:
            embeddings (np.ndarray): Shape (num_tickets, embedding_dim)
        """
        if embeddings.ndim != 2:
            raise ValueError("Embeddings must be a 2D numpy array")

        if embeddings.shape[1] != self.embedding_dim:
            raise ValueError(
                f"Expected embedding dimension {self.embedding_dim}, "
                f"got {embeddings.shape[1]}"
            )

        self.index.add(embeddings)

    def search(
        self,
        query_embedding: np.ndarray,
        top_k: int = 5
    ) -> Tuple[np.ndarray, np.ndarray]:
        """
        Search for top-k most similar tickets.

        Args:
            query_embedding (np.ndarray): Shape (1, embedding_dim)
            top_k (int): Number of results to return

        Returns:
            indices (np.ndarray): Indices of matched tickets
            scores (np.ndarray): Similarity scores
        """
        if query_embedding.ndim == 1:
            query_embedding = query_embedding.reshape(1, -1)

        if query_embedding.shape[1] != self.embedding_dim:
            raise ValueError(
                f"Expected query embedding dimension {self.embedding_dim}, "
                f"got {query_embedding.shape[1]}"
            )

        scores, indices = self.index.search(query_embedding, top_k)
        return indices, scores

    def __len__(self) -> int:
        """
        Number of vectors in the index.
        """
        return self.index.ntotal


## rag_pipeline.py

In [1]:
# app/rag_pipeline.py
import pandas as pd
from app.embedder import TicketEmbedder
from app.faiss_index import TicketFAISSIndex


class RAGPipeline:
    def __init__(self, data_path: str):
        self.embedder = TicketEmbedder()

        df = pd.read_csv(data_path)

        texts = (
            df["title"].fillna("") + " " +
            df["description"].fillna("") + " " +
            df["error_logs"].fillna("")
        ).tolist()

        if len(texts) == 0:
            raise ValueError("No text found for embedding")

        embeddings = self.embedder.embed(texts)

        print("Embeddings shape:", embeddings.shape)

        self.index = TicketFAISSIndex(embedding_dim=embeddings.shape[1])
        self.index.add_embeddings(embeddings)

        self.texts = texts  # store for retrieval

    def resolve_ticket(self, query: str, top_k: int = 5) -> str:
        results = self.index.search(query, top_k)

        if not results:
            return "No similar tickets found."

        return results[0]
print("Loading data from:", data_path)





ModuleNotFoundError: No module named 'faiss'

## main.py

In [None]:
# app/main.py

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from app.rag_pipeline import RAGPipeline

# Initialize FastAPI app
app = FastAPI(
    title="AI Ticket Resolution System",
    description="RAG-based support ticket resolution using FAISS and embeddings",
    version="1.0.0"
)

# Initialize RAG pipeline (runs once at startup)
rag_pipeline = RAGPipeline(
    data_path="data/software_ticket_resolution_70k.csv"
)


# -------- Request & Response Models -------- #

class TicketQuery(BaseModel):
    query: str
    top_k: int = 5


class TicketResponse(BaseModel):
    resolution: str


# -------- API Endpoints -------- #

@app.get("/")
def health_check():
    """
    Health check endpoint
    """
    return {"status": "running", "message": "AI Ticket Resolution API is live"}


@app.post("/resolve", response_model=TicketResponse)
def resolve_ticket(request: TicketQuery):
    """
    Resolve a support ticket using RAG
    """
    if not request.query.strip():
        raise HTTPException(status_code=400, detail="Query cannot be empty")

    resolution = rag_pipeline.resolve_ticket(
        query=request.query,
        top_k=request.top_k
    )

    return {"resolution": resolution}
