In [1]:

!pip install sentence-transformers scikit-learn langchain faiss-cpu

# 🧾 Step 2: Load Dataset
import pandas as pd
df = pd.read_csv("sales_calls_1000.csv")
df.head()





[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Unnamed: 0,transcript,label
0,Customer asked detailed questions and made a c...,1
1,They said it was not the right time to purchas...,0
2,They asked about pricing and signed up at the ...,1
3,Customer objected to the price and declined th...,0
4,They liked the product demo and decided to go ...,1


In [2]:
# 🧪 Step 3: Create Contrastive Pairs
import random
from sentence_transformers import InputExample
from torch.utils.data import DataLoader

positive = df[df['label'] == 1]['transcript'].tolist()
negative = df[df['label'] == 0]['transcript'].tolist()

train_data = []
for pos in positive[:300]:
    neg = random.choice(negative)
    train_data.append({'text1': pos, 'text2': neg, 'label': 0})

for i in range(300):
    train_data.append({'text1': positive[i], 'text2': positive[(i + 1) % 300], 'label': 1})

train_samples = [InputExample(texts=[row['text1'], row['text2']], label=float(row['label'])) for row in train_data]
train_dataloader = DataLoader(train_samples, shuffle=True, batch_size=16)


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
!pip install datasets




[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
from datasets import Dataset


In [5]:
# 🧠 Step 4: Fine-Tune SentenceTransformer with Contrastive Loss
from sentence_transformers import SentenceTransformer, losses

model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
train_loss = losses.CosineSimilarityLoss(model=model)

model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    epochs=1,
    warmup_steps=50,
    output_path='fine-tuned-sales-embed'
)


ImportError: Using the `Trainer` with `PyTorch` requires `accelerate>=0.26.0`: Please run `pip install transformers[torch]` or `pip install 'accelerate>=0.26.0'`

In [None]:
# 📊 Step 5: Evaluate Fine-tuned vs. Generic Embeddings
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Fine-tuned model
model_finetuned = SentenceTransformer('fine-tuned-sales-embed')
X_finetuned = model_finetuned.encode(df['transcript'].tolist())
y = df['label'].tolist()

X_train, X_test, y_train, y_test = train_test_split(X_finetuned, y, test_size=0.2)
clf_finetuned = LogisticRegression().fit(X_train, y_train)
y_pred_finetuned = clf_finetuned.predict(X_test)

print("🔍 Fine-tuned Embedding Performance:")
print(classification_report(y_test, y_pred_finetuned))


In [None]:
# Generic model baseline
generic_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
X_generic = generic_model.encode(df['transcript'].tolist())

X_train_g, X_test_g, y_train_g, y_test_g = train_test_split(X_generic, y, test_size=0.2)
clf_generic = LogisticRegression().fit(X_train_g, y_train_g)
y_pred_generic = clf_generic.predict(X_test_g)

print("⚖️ Generic Embedding Performance:")
print(classification_report(y_test_g, y_pred_generic))


In [None]:
# 🔗 Step 6: LangChain Integration with FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

embedding_model = HuggingFaceEmbeddings(model_name='fine-tuned-sales-embed')

texts = df['transcript'].tolist()
vectorstore = FAISS.from_texts(texts, embedding_model)

# 🕵️ Step 7: Query Similar Transcripts
query = "Customer was hesitant but asked about pricing and features."
results = vectorstore.similarity_search(query, k=3)

print("🎯 Top Matches for Query:")
for r in results:
    print("-", r.page_content)
