In [1]:
import Py_files.twcs_processor as processor
import pandas as pd
import json
from Notebooks.VectorDBStructure.query import query_similar
from Py_files.reranker import CrossEncoderReranker
from sklearn.preprocessing import StandardScaler
from typing import List, Dict

In [2]:
import openai
from dotenv import load_dotenv
load_dotenv()
import os

from Py_files.prompts import ENDBOT_PROMPT

# Load environment variables from .env file
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Set OpenAI API key
client = openai.OpenAI(api_key=OPENAI_API_KEY)

In [3]:
user_input = "My Echo keeps playing the same song over and over, how do I fix recommendations?"

In [4]:
user_input = processor.TWCSProcessor._clean_single(user_input)
user_input_processed = processor.TWCSProcessor._convert_to_conversation(user_input)

In [5]:
# convert the user input to a pandas dataframe
user_input_df = pd.DataFrame([[user_input_processed]], columns=['cleaned_conversations'])

In [6]:
from Py_files.llm_extractor import LLMExtractor
pipe = LLMExtractor(dataframe = user_input_df)

# only products / issue-types / services
df1 = pipe.extract_entities()

# pack them into a single JSON field
df2 = pipe.process_entities_json()

# create RDF triples
df3 = pipe.extract_relationships()

# produce suggested resolutions
df4 = pipe.extract_resolution()

2025-05-15 12:41:44,441 [INFO] LLMExtractor: Loaded data – 1 rows
2025-05-15 12:41:44,442 [INFO] LLMExtractor: STEP 1 – Extracting issue-types, products, services
LLM steps:   0%|          | 0/1 [00:00<?, ?it/s]2025-05-15 12:41:45,519 [INFO] httpx: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
LLM steps: 100%|██████████| 1/1 [00:01<00:00,  1.08s/it]
LLM steps:   0%|          | 0/1 [00:00<?, ?it/s]2025-05-15 12:41:46,278 [INFO] httpx: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
LLM steps: 100%|██████████| 1/1 [00:00<00:00,  1.32it/s]
LLM steps:   0%|          | 0/1 [00:00<?, ?it/s]2025-05-15 12:41:47,122 [INFO] httpx: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
LLM steps: 100%|██████████| 1/1 [00:00<00:00,  1.18it/s]
2025-05-15 12:41:47,127 [INFO] LLMExtractor: STEP 2 – Packing entities into single JSON field
LLM steps: 100%|██████████| 1/1 [00:00<?, ?it/s]
2025-05-15 12:41:47,130 [

In [8]:
from Notebooks.VectorDBStructure.db_structure import DatabaseStructure

db = DatabaseStructure()

cleaned_conversations = user_input_processed
entities = df4['entities'].values[0]
resolution = df4['resolution'].values[0]
relationship = df4['relationship'].values[0]

fixed_relationships = db.fix_relationships(relationship,resolution)

2025-05-15 12:41:56,329 [INFO] datasets: PyTorch version 2.1.1+cu121 available.
2025-05-15 12:41:56,330 [INFO] datasets: TensorFlow version 2.12.1 available.
2025-05-15 12:41:56,434 [INFO] sentence_transformers.SentenceTransformer: Use pytorch device_name: cuda
2025-05-15 12:41:56,435 [INFO] sentence_transformers.SentenceTransformer: Load pretrained SentenceTransformer: all-MiniLM-L6-v2


In [9]:
embedding = db.text_to_embedding(entities, fixed_relationships).tolist()

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [10]:
# Step 1: Initialize the reranker
reranker = CrossEncoderReranker(top_k=50)

# Step 2: Query Elastic and get hits
hits = query_similar(embedding, k=50)

# Step 3: Extract conversation candidates
candidates = [hit["_source"]["Conversation_History"]["conversation"] for hit in hits]

# Step 4: Rerank with cross-encoder
reranked = reranker.rerank(user_input, candidates)

# Step 5: Create a mapping: conversation -> (score, rank)
score_rank_map = {
    conv: (score, rank + 1)  # rank is 1-based
    for rank, (conv, score) in enumerate(reranked)
}

# Step 6: Construct final rows with rank
rows = []
for hit in hits:
    src = hit["_source"]
    conv = src["Conversation_History"]["conversation"]
    score, rank = score_rank_map.get(conv, (0.0, None))  # Not reranked if not in top_k

    rows.append({
        "prompt": user_input,
        "id": hit["_id"],
        "similarity_score": hit["_score"],
        "rerank_score": score,
        "rerank_rank": rank,
        "ChatID": src["ChatID"],
        "Company_name": src["Company_name"],
        "Conversation_History": conv,
        "Entities": json.dumps(src["Entities"]),
        "Relationships": json.dumps(src["Relationships"])
    })

# Step 7: Create DataFrame and optionally sort by rerank_rank
reranked_qa = pd.DataFrame(rows).sort_values(by="rerank_rank", na_position="last").reset_index(drop=True)


2025-05-15 12:41:59,315 [INFO] elastic_transport.transport: GET http://localhost:9200/chat_embeddings/_mapping [status:200 duration:0.022s]
2025-05-15 12:41:59,360 [INFO] elastic_transport.transport: POST http://localhost:9200/chat_embeddings/_search [status:200 duration:0.044s]


In [11]:
scaler = StandardScaler()
reranked_qa[["sim_norm", "rerank_norm"]] = scaler.fit_transform(
    reranked_qa[["similarity_score", "rerank_score"]].fillna(0)
)
reranked_qa["hybrid_score"] = 0.3 * reranked_qa["sim_norm"] + 0.7 * reranked_qa["rerank_norm"]

In [12]:
top_candidates = reranked_qa.sort_values(by="hybrid_score", ascending=False).head(10)

seen_combinations = set()
filtered_rows = []

for _, row in top_candidates.iterrows():
    key = (row["Entities"], row["Relationships"])
    if key not in seen_combinations:
        filtered_rows.append(row)
        seen_combinations.add(key)
    if len(filtered_rows) == 5:
        break

top5_df = pd.DataFrame(filtered_rows)

In [13]:
top5_df

Unnamed: 0,prompt,id,similarity_score,rerank_score,rerank_rank,ChatID,Company_name,Conversation_History,Entities,Relationships,sim_norm,rerank_norm,hybrid_score
0,My Echo keeps playing the same song over and o...,S6hsxZYBEfCvEamxqVZ5,1.734153,-2.163459,1,3482,AmazonHelp,Customer kids listening to disney soundtracks ...,"{""products"": [""Echo""], ""services"": [], ""issue_...",[],3.412532,4.331786,4.05601
1,My Echo keeps playing the same song over and o...,bqhsxZYBEfCvEamxo0mF,1.545723,-3.474696,2,189,SpotifyCares,Customer Can fix the android app so that it do...,"{""products"": [""android app"", ""Spotify"", ""Googl...",[],-0.80821,3.650772,2.313078
2,My Echo keeps playing the same song over and o...,YKhsxZYBEfCvEamxpk62,1.636411,-5.203551,3,1455,AmazonHelp,Customer echo app is virtually useless Wont le...,"{""products"": [""echo app"", ""Apple Music"", ""Amaz...","[{""subject"": ""echo app"", ""predicate"": ""hasIssu...",1.223162,2.752862,2.293952
4,My Echo keeps playing the same song over and o...,c6hsxZYBEfCvEamxq1kG,1.679368,-8.606672,5,4290,SpotifyCares,Customer Hi Spotify my saved songs list keeps ...,"{""products"": [], ""services"": [], ""issue_types""...",[],2.185383,0.985393,1.34539
3,My Echo keeps playing the same song over and o...,BqhsxZYBEfCvEamxq1yR,1.583697,-8.449755,4,4949,AmazonHelp,Customer got my Echo Dot today and its so cool...,"{""products"": [""Echo Dot""], ""services"": [""Echo ...",[],0.042402,1.06689,0.759544


In [14]:
def parse_conversation(text: str) -> List[Dict[str, str]]:
    lines = text.split("\n")
    parsed = []
    for line in lines:
        lower = line.lower()
        if lower.startswith("customer"):
            role = "Customer"
            msg = line[len("Customer"):].strip()
        elif lower.startswith("company"):
            role = "Company"
            msg = line[len("Company"):].strip()
        else:
            # fallback: use last role or unknown
            role = "Customer" if not parsed else parsed[-1]["role"]
            msg = line.strip()
        if msg:
            parsed.append({"role": role, "message": msg})
    return parsed



In [15]:
def build_payload_per_qa(df_top5, query: str) -> str:
    results = []
    for _, row in df_top5.iterrows():
        conv = row["Conversation_History"]

        if isinstance(conv, str):
            try:
                conv_json = json.loads(conv)
                conversation = conv_json  # Already parsed
            except:
                conversation = parse_conversation(conv)
        else:
            conversation = conv

        try:
            intents = json.loads(row["Entities"])
        except:
            intents = {}

        try:
            relationships = json.loads(row["Relationships"])
        except:
            relationships = []

        results.append({
            "company_name": row["Company_name"],
            "conversation": conversation,
            "intents": intents,
            "relationships": relationships
        })

    full_payload = {
        "query": query.strip(),
        "retrieved_answers": results
    }

    return json.dumps(full_payload, ensure_ascii=False, indent=2)


In [None]:
payload = build_payload_per_qa(
    df_top5=top5_df.sort_values(by="hybrid_score", ascending=False).head(5),
    query="My Echo keeps playing the same song over and over how do I fix recommendations"
)

{
  "query": "My Echo keeps playing the same song over and over how do I fix recommendations",
  "retrieved_answers": [
    {
      "company_name": "AmazonHelp",
      "conversation": [
        {
          "role": "Customer",
          "message": "kids listening to disney soundtracks etc on Echo has completely ruined my recommendations Any solution"
        },
        {
          "role": "Company",
          "message": "The option isnt avail but were always looking for ways to improve Heres how to submit suggestions JO"
        }
      ],
      "intents": {
        "products": [
          "Echo"
        ],
        "services": [],
        "issue_types": [
          "recommendation issue"
        ]
      },
      "relationships": []
    },
    {
      "company_name": "SpotifyCares",
      "conversation": [
        {
          "role": "Customer",
          "message": "Can fix the android app so that it doesnt pause other android apps if Spotify is changing song on a completely different d

In [18]:
response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": ENDBOT_PROMPT},
        {"role": "user", "content": payload}
    ],
    temperature=0,
    top_p=0.95
)
response.choices[0].message.content

2025-05-15 12:44:00,130 [INFO] httpx: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


"To fix your Echo's recommendation issues, you can submit suggestions to Amazon, as they are always looking for ways to improve. Unfortunately, there isn't a specific option available to resolve this directly."