In [1]:
!pip install pandas numpy scikit-learn spacy flask fastapi uvicorn
!python -m spacy download en_core_web_sm

Collecting spacy
  Downloading spacy-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (27 kB)
Collecting flask
  Downloading flask-3.1.0-py3-none-any.whl.metadata (2.7 kB)
Collecting spacy-legacy<3.1.0,>=3.0.11 (from spacy)
  Downloading spacy_legacy-3.0.12-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting spacy-loggers<2.0.0,>=1.0.0 (from spacy)
  Downloading spacy_loggers-1.0.5-py3-none-any.whl.metadata (23 kB)
Collecting murmurhash<1.1.0,>=0.28.0 (from spacy)
  Downloading murmurhash-1.0.12-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB)
Collecting cymem<2.1.0,>=2.0.2 (from spacy)
  Downloading cymem-2.0.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.5 kB)
Collecting preshed<3.1.0,>=3.0.2 (from spacy)
  Downloading preshed-3.0.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.2 kB)
Collecting thinc<8.4.0

In [2]:
import pandas as pd
import json
import random

# Generate synthetic data
data = {
    "id": [],
    "text_snippet": [],
    "labels": []
}

competitors = ["CompetitorX", "CompetitorY", "CompetitorZ"]
features = ["analytics", "AI engine", "data pipeline"]
pricing_keywords = ["discount", "renewal cost", "budget", "pricing model"]

for i in range(1, 101):
    text = f"We love the {random.choice(features)}, but {random.choice(competitors)} has a cheaper subscription."
    labels = "Positive, Pricing Discussion, Objection"
    data["id"].append(i)
    data["text_snippet"].append(text)
    data["labels"].append(labels)

# Create DataFrame and save to CSV
df = pd.DataFrame(data)
df.to_csv('calls_dataset.csv', index=False)

# Create domain knowledge JSON
domain_knowledge = {
    "competitors": competitors,
    "features": features,
    "pricing_keywords": pricing_keywords
}

with open('domain_knowledge.json', 'w') as json_file:
    json.dump(domain_knowledge, json_file)

In [3]:
import re
from sklearn.model_selection import train_test_split

# Load dataset
df = pd.read_csv('calls_dataset.csv')

# Preprocess text
def clean_text(text):
    text = re.sub(r'\W', ' ', text)  # Remove special characters
    text = text.lower()  # Convert to lowercase
    return text

df['cleaned_text'] = df['text_snippet'].apply(clean_text)

# Split data into training and validation sets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [9]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import classification_report

# Prepare labels
mlb = MultiLabelBinarizer()
y_train = mlb.fit_transform(train_df['labels'].str.split(', '))
y_test = mlb.transform(test_df['labels'].str.split(', '))

# Convert text data to numerical data
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(train_df['cleaned_text'])
X_test_vectorized = vectorizer.transform(test_df['cleaned_text'])

# Train a OneVsRestClassifier with Logistic Regression
model = OneVsRestClassifier(LogisticRegression(max_iter=1000))
model.fit(X_train_vectorized, y_train)

# Evaluate the model
y_pred = model.predict(X_test_vectorized)
print(classification_report(y_test, y_pred, target_names=mlb.classes_))

                    precision    recall  f1-score   support

         Objection       1.00      1.00      1.00        20
          Positive       1.00      1.00      1.00        20
Pricing Discussion       1.00      1.00      1.00        20

         micro avg       1.00      1.00      1.00        60
         macro avg       1.00      1.00      1.00        60
      weighted avg       1.00      1.00      1.00        60
       samples avg       1.00      1.00      1.00        60





In [11]:
import spacy

# Load the NER model
nlp = spacy.load("en_core_web_sm")

# Load domain knowledge
with open('domain_knowledge.json') as json_file:
    domain_knowledge = json.load(json_file)

def extract_entities(text):
    doc = nlp(text)
    entities = set()
    
    # Dictionary lookup
    for entity in domain_knowledge['competitors']:
        if entity.lower() in text.lower():
            entities.add(entity)
    
    # NER extraction
    for ent in doc.ents:
        if ent.label_ in ['ORG', 'PRODUCT']:
            entities.add(ent.text)
    
    return list(entities)

# Apply entity extraction
df['extracted_entities'] = df['text_snippet'].apply(extract_entities)

In [1]:
from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    data = request.json
    text = data['text']
    cleaned_text = clean_text(text)
    
    # Predict labels
    predicted_labels = model.predict([cleaned_text])
    extracted_entities = extract_entities(text)
    
    return jsonify({
        'predicted_labels': mlb.inverse_transform(predicted_labels),
        'extracted_entities': extracted_entities
    })

if __name__ == '__main__':
    app.run(debug=True, port=5001)  # Change the port here

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5001
[33mPress CTRL+C to quit[0m
 * Restarting with stat
Traceback (most recent call last):
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/traitlets/config/application.py", line 1074, in launch_instance
    app.initialize(argv)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/traitlets/config/application.py", line 118, in inner
    return method(app, *args, **kwargs)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 690, in initia

SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [4]:
pip install -r requirements.txt


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
[31mERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirements.txt'[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [5]:
!pip install -r requirements.txt


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [6]:
pip install --upgrade pip

Collecting pip
  Downloading pip-25.0-py3-none-any.whl.metadata (3.7 kB)
Downloading pip-25.0-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m224.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.3.1
    Uninstalling pip-24.3.1:
      Successfully uninstalled pip-24.3.1
Successfully installed pip-25.0
Note: you may need to restart the kernel to use updated packages.


In [None]:
!pip install -r requirements.txt