In [None]:
#classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments, pipeline
import warnings
warnings.filterwarnings('ignore')

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Configuration
RANDOM_SEED = 42
BATCH_SIZE = 16
MAX_LEN = 64
EPOCHS = 10
LEARNING_RATE = 2e-5
NUM_CLASSES = 2

# Set random seeds
torch.manual_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

# Load dataset
df = pd.read_csv("/Users/User/CSProjects/CSC392_AI_agent/emphatic-AI-Winter2025/ambiguity_model/data/synthetic_data/ambiguous_prompts_dataset_distinct.csv")

# Split dataset
X_train, X_val, y_train, y_val = train_test_split(
    df['text'], df['label'],
    test_size=0.2,
    random_state=RANDOM_SEED,
    stratify=df['label']
)

# Initialize BERT tokenizer

model_name = "microsoft/MiniLM-L12-H384-uncased"

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Create PyTorch Dataset
class AmbiguityDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
        
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts.iloc[idx])
        label = self.labels.iloc[idx]
        
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )
        
        return {
            'text': text,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'label': torch.tensor(label, dtype=torch.long)
        }

# Create data loaders
train_dataset = AmbiguityDataset(X_train, y_train, tokenizer, MAX_LEN)
val_dataset = AmbiguityDataset(X_val, y_val, tokenizer, MAX_LEN)

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE
)

# Create classifier model
class AmbiguityClassifier(nn.Module):
    def __init__(self, num_classes):
        super(AmbiguityClassifier, self).__init__()
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_classes)
    
    def forward(self, input_ids, attention_mask):
        outputs = self.model(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        return outputs.logits

model = AmbiguityClassifier(NUM_CLASSES).to(device)

# Set up training
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE)
loss_fn = nn.CrossEntropyLoss().to(device)

# Training loop
def train_model():
    best_accuracy = 0
    for epoch in range(EPOCHS):
        print(f'Epoch {epoch + 1}/{EPOCHS}')
        print('-' * 10)
        
        # Training phase
        model.train()
        train_loss = 0
        
        for batch in train_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            
            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            
        avg_train_loss = train_loss / len(train_loader)
        
        # Validation phase
        model.eval()
        val_loss = 0
        correct_preds = 0
        
        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['label'].to(device)
                
                outputs = model(input_ids, attention_mask)
                loss = loss_fn(outputs, labels)
                val_loss += loss.item()
                
                _, preds = torch.max(outputs, dim=1)
                correct_preds += torch.sum(preds == labels)
                
        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = correct_preds.double() / len(val_dataset)
        
        print(f'Train loss: {avg_train_loss:.4f}')
        print(f'Val loss: {avg_val_loss:.4f}')
        print(f'Val accuracy: {val_accuracy:.4f}\n')
        
        if val_accuracy > best_accuracy:
            torch.save(model.state_dict(), 'best_model_state.bin')
            best_accuracy = val_accuracy
            
    print(f'Best validation accuracy: {best_accuracy:.4f}')

# Start training
train_model()

# Evaluation
def evaluate_model(model_path):
    model.load_state_dict(torch.load(model_path))
    model.eval()
    
    y_true = []
    y_pred = []
    
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            
            outputs = model(input_ids, attention_mask)
            _, preds = torch.max(outputs, dim=1)
            
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
            
    print(classification_report(y_true, y_pred))

evaluate_model('best_model_state.bin')

# Example prediction
def predict_ambiguity(text):
    model.eval()
    encoding = tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=MAX_LEN,
        return_token_type_ids=False,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt',
    )
    
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    
    with torch.no_grad():
        outputs = model(input_ids, attention_mask)
        _, prediction = torch.max(outputs, dim=1)
        
    return 'Ambiguous' if prediction.item() == 1 else 'Clear'

# Test with sample input
test_text = "How do I fix my computer?"
print(f"Input: '{test_text}'\nPrediction: {predict_ambiguity(test_text)}")

print(df['label'].value_counts())


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at microsoft/MiniLM-L12-H384-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
----------
Train loss: 0.6695
Val loss: 0.6323
Val accuracy: 0.6970

Epoch 2/10
----------
Train loss: 0.6480
Val loss: 0.5300
Val accuracy: 0.6970

Epoch 3/10
----------
Train loss: 0.5213
Val loss: 0.4100
Val accuracy: 0.6970

Epoch 4/10
----------
Train loss: 0.3997
Val loss: 0.2793
Val accuracy: 1.0000

Epoch 5/10
----------
Train loss: 0.2686
Val loss: 0.1766
Val accuracy: 1.0000

Epoch 6/10
----------
Train loss: 0.1688
Val loss: 0.1177
Val accuracy: 1.0000

Epoch 7/10
----------
Train loss: 0.1114
Val loss: 0.0848
Val accuracy: 1.0000

Epoch 8/10
----------
Train loss: 0.0827
Val loss: 0.0649
Val accuracy: 1.0000

Epoch 9/10
----------
Train loss: 0.0640
Val loss: 0.0518
Val accuracy: 1.0000

Epoch 10/10
----------
Train loss: 0.0518
Val loss: 0.0425
Val accuracy: 1.0000

Best validation accuracy: 1.0000
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        10

  

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from transformers import AutoTokenizer, AutoModel
import torch

class ClarificationNeedDetector:
    def __init__(self, model_name="microsoft/MiniLM-L12-H384-uncased"):
        # Load tokenizer and model
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
    
    def preprocess_text(self, text):
        """
        Preprocess input text using the tokenizer
        
        Args:
            text (str): Input query
        
        Returns:
            dict: Tokenizer outputs (input_ids, attention_mask)
        """
        # Tokenize text
        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=64,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        return encoding
    
    def extract_features(self, texts):
        """
        Extract features using the transformer model

        Args:
            texts (list): List of input queries
        
        Returns:
            np.ndarray: Extracted features (embeddings)
        """
        features = []

        for text in texts:
            # Preprocess the text
            encoding = self.preprocess_text(text)
            input_ids = encoding["input_ids"].to(self.device)
            attention_mask = encoding["attention_mask"].to(self.device)

            # Get embeddings from the transformer model
            with torch.no_grad():
                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
                embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()  
                
            features.append(embeddings.flatten())

        return np.array(features)
    
    def train_clarification_model(self, training_data, labels):
        """
        Train a classification model to predict clarification need
        
        Args:
            training_data (list): Training queries
            labels (list): Binary labels (need clarification or not)
        
        Returns:
            Pipeline: Trained ML pipeline
        """
        # Extract features
        X = self.extract_features(training_data)
        y = np.array(labels)
        
        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        ) # adding the stratifier parameter makes sure that the class distribution matches the dataset
        
        # Create ML pipeline
        pipeline = Pipeline([
            ('classifier', RandomForestClassifier(
                n_estimators=100, 
                max_depth=10,
                min_samples_split=5,
                class_weight="balanced",
                random_state=42 
            ))
        ])

        # Fit model
        pipeline.fit(X_train, y_train)

        cv_scores = cross_val_score(pipeline, X_train, y_train, cv=5, scoring="f1")

        print(f"Cross-validation F1 scores: {cv_scores}")
        print(f"Mean F1 score: {cv_scores.mean()}")
        
        # Evaluate val set
        y_val_pred = pipeline.predict(X_test)
        print("Validation Set Performance:")
        print(classification_report(y_val, y_val_pred))
    
        return pipeline
    
    def predict_clarification_need(self, query, model):
        """
        Predict whether a query needs clarification
        
        Args:
            query (str): Input query
            model (Pipeline): Trained classification model
        
        Returns:
            bool: Whether clarification is needed
        """
        features = self.extract_features([query])
        prediction = model.predict(features)
        return bool(prediction[0])
    
    def evaluate_model(self, test_data, test_labels, model):
        """
        Evaluate the classification model on test data.

        Args:
            test_data (list): List of test queries.
            test_labels (list): Corresponding binary labels.
            model (Pipeline): Trained classification model (RandomForestClassifier).

        Returns:
            None
        """
        # Extract features for test data
        X_test = self.extract_features(test_data)
        y_test = np.array(test_labels)

        # Make predictions
        y_pred = model.predict(X_test)

        # Print classification report
        print("Classification Report:")
        print(classification_report(y_test, y_pred))

        # Print classification report
        print(classification_report(y_test, y_pred))


# Example usage
detector = ClarificationNeedDetector()

# # Simulated training data
# training_queries = [
#     "Tell me about that thing",
#     "What's the best way to solve this problem?",
#     "I need help with my project",
#     "Explain the process of photosynthesis in plants"
# ]
# training_labels = [1, 1, 1, 0]  # 1 means needs clarification

# loading real dataset
train_df = pd.read_csv("/Users/User/CSProjects/CSC392_AI_agent/emphatic-AI-Winter2025/ambiguity_model/data/synthetic_data/ambiguous_prompts_dataset_distinct.csv")  
test_df = pd.read_csv("/Users/User/CSProjects/CSC392_AI_agent/emphatic-AI-Winter2025/ambiguity_model/data/clean_data/changed_test.csv")    

# Extract training data and labels
training_queries = train_df["text"].tolist() 
training_labels = train_df["label"].tolist()  

# Extract test data and labels
test_queries = test_df["text"].tolist()       
test_labels = test_df["label"].tolist()  

# Train model
clarification_model = detector.train_clarification_model(
    training_queries, 
    training_labels
)

# This evaluates model on test set
detector.evaluate_model(test_queries, test_labels, clarification_model)

# Test prediction
test_query = "Tell me about something"
needs_clarification = detector.predict_clarification_need(
    test_query, 
    clarification_model
)
print(f"Query needs clarification: {needs_clarification}")

Cross-validation F1 scores: [1. 1. 1. 1. 1.]
Mean F1 score: 1.0
Validation Set Performance:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        10

    accuracy                           1.00        33
   macro avg       1.00      1.00      1.00        33
weighted avg       1.00      1.00      1.00        33

Classification Report:
              precision    recall  f1-score   support

           0       0.44      0.10      0.16        41
           1       0.42      0.84      0.56        32

    accuracy                           0.42        73
   macro avg       0.43      0.47      0.36        73
weighted avg       0.43      0.42      0.34        73

Confusion Matrix:


NameError: name 'confusion_matrix' is not defined

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from transformers import AutoTokenizer, AutoModel
import torch

class ClarificationNeedDetector:
    def __init__(self, model_name="microsoft/MiniLM-L12-H384-uncased"):
        """Initialize the transformer model and tokenizer."""
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
    
    def preprocess_text(self, text):
        """Tokenizes input text and converts it to tensor format."""
        encoding = self.tokenizer(
            text, add_special_tokens=True, max_length=64, 
            padding="max_length", truncation=True, return_tensors="pt"
        )
        return encoding
    
    def extract_features(self, texts):
        """Extracts sentence embeddings using the transformer model."""
        features = []
        for text in texts:
            encoding = self.preprocess_text(text)
            input_ids = encoding["input_ids"].to(self.device)
            attention_mask = encoding["attention_mask"].to(self.device)

            with torch.no_grad():
                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
                embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()  

            features.append(embeddings.flatten())

        return np.array(features)
    
    def train_clarification_model(self, training_data, labels):
        """Trains a classification model using extracted features."""
        X = self.extract_features(training_data)
        y = np.array(labels)

        # Split data into training and validation sets
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        ) 

        # Define classifier pipeline
        pipeline = Pipeline([
            ('classifier', RandomForestClassifier(
                n_estimators=25, max_depth=5, min_samples_split=10, 
                class_weight="balanced", random_state=42
            ))
        ])

        # Train the model
        pipeline.fit(X_train, y_train)

        # Cross-validation performance
        cv_scores = cross_val_score(pipeline, X_train, y_train, cv=5, scoring="f1")
        print(f"Cross-validation F1 scores: {cv_scores}")
        print(f"Mean F1 score: {cv_scores.mean()}")

        # Evaluate validation set
        y_pred = pipeline.predict(X_test)
        print("Validation Set Performance:")
        print(classification_report(y_test, y_pred))
    
        return pipeline
    
    def predict_clarification_need(self, query, model):
        """Predicts whether a query needs clarification."""
        features = self.extract_features([query])
        prediction = model.predict(features)
        return bool(prediction[0])
    
    def evaluate_model(self, test_data, test_labels, model):
        """Evaluates the trained model on test data."""
        X_test = self.extract_features(test_data)
        y_test = np.array(test_labels)

        y_pred = model.predict(X_test)

        print("Classification Report:")
        print(classification_report(y_test, y_pred))


# Initialize detector
detector = ClarificationNeedDetector()

# Load datasets
train_df = pd.read_csv("/Users/User/CSProjects/CSC392_AI_agent/emphatic-AI-Winter2025/ambiguity_model/data/synthetic_data/ambiguous_prompts_dataset_distinct.csv")  
test_df = pd.read_csv("/Users/User/CSProjects/CSC392_AI_agent/emphatic-AI-Winter2025/ambiguity_model/data/clean_data/changed_test.csv")    

# Extract training and test data
training_queries, training_labels = train_df["text"].tolist(), train_df["label"].tolist()
test_queries, test_labels = test_df["text"].tolist(), test_df["label"].tolist()



# Train model
clarification_model = detector.train_clarification_model(training_queries, training_labels)

# Evaluate model
detector.evaluate_model(test_queries, test_labels, clarification_model)

# Test prediction
test_query = "Tell me about something"
needs_clarification = detector.predict_clarification_need(test_query, clarification_model)
print(f"Query needs clarification: {needs_clarification}")





Cross-validation F1 scores: [1. 1. 1. 1. 1.]
Mean F1 score: 1.0
Validation Set Performance:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        10

    accuracy                           1.00        33
   macro avg       1.00      1.00      1.00        33
weighted avg       1.00      1.00      1.00        33

Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.22      0.33        41
           1       0.46      0.84      0.59        32

    accuracy                           0.49        73
   macro avg       0.55      0.53      0.46        73
weighted avg       0.56      0.49      0.44        73

Query needs clarification: False
Counter({0: 112, 1: 51})
Counter({0: 41, 1: 32})


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from transformers import AutoTokenizer, AutoModel
import torch

class ClarificationNeedDetector:
    def __init__(self, model_name="distilbert-base-uncased"):
        """Initialize the transformer model and tokenizer."""
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(self.device)
    
    def preprocess_text(self, text):
        """Tokenizes input text and converts it to tensor format."""
        encoding = self.tokenizer(
            text, add_special_tokens=True, max_length=64, 
            padding="max_length", truncation=True, return_tensors="pt"
        )
        return encoding
    
    def extract_features(self, texts):
        """Extracts sentence embeddings using the transformer model."""
        features = []
        for text in texts:
            encoding = self.preprocess_text(text)
            input_ids = encoding["input_ids"].to(self.device)
            attention_mask = encoding["attention_mask"].to(self.device)

            with torch.no_grad():
                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
                embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()  

            features.append(embeddings.flatten())

        return np.array(features)
    
    def train_clarification_model(self, training_data, labels):
        """Trains a classification model using extracted features."""
        X = self.extract_features(training_data)
        y = np.array(labels)

        # Split data into training and validation sets
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        ) 

        # Define classifier pipeline
        pipeline = Pipeline([
            ('classifier', RandomForestClassifier(
                n_estimators=25, max_depth=5, min_samples_split=10, 
                class_weight="balanced", random_state=42
            ))
        ])

        # Train the model
        pipeline.fit(X_train, y_train)

        # Cross-validation performance
        cv_scores = cross_val_score(pipeline, X_train, y_train, cv=5, scoring="f1")
        print(f"Cross-validation F1 scores: {cv_scores}")
        print(f"Mean F1 score: {cv_scores.mean()}")

        # Evaluate validation set
        y_pred = pipeline.predict(X_test)
        print("Validation Set Performance:")
        print(classification_report(y_test, y_pred))
    
        return pipeline
    
    def predict_clarification_need(self, query, model):
        """Predicts whether a query needs clarification."""
        features = self.extract_features([query])
        prediction = model.predict(features)
        return bool(prediction[0])
    
    def evaluate_model(self, test_data, test_labels, model):
        """Evaluates the trained model on test data."""
        X_test = self.extract_features(test_data)
        y_test = np.array(test_labels)

        y_pred = model.predict(X_test)

        print("Classification Report:")
        print(classification_report(y_test, y_pred))


# Initialize detector
detector = ClarificationNeedDetector()

# Load datasets
train_df = pd.read_csv("/Users/User/CSProjects/CSC392_AI_agent/emphatic-AI-Winter2025/ambiguity_model/data/synthetic_data/ambiguous_prompts_dataset_distinct.csv")  
test_df = pd.read_csv("/Users/User/CSProjects/CSC392_AI_agent/emphatic-AI-Winter2025/ambiguity_model/data/clean_data/changed_test.csv")    

# Extract training and test data
training_queries, training_labels = train_df["text"].tolist(), train_df["label"].tolist()
test_queries, test_labels = test_df["text"].tolist(), test_df["label"].tolist()



# Train model
clarification_model = detector.train_clarification_model(training_queries, training_labels)

# Evaluate model
detector.evaluate_model(test_queries, test_labels, clarification_model)

# Test prediction
test_query = "Tell me about something"
needs_clarification = detector.predict_clarification_need(test_query, clarification_model)
print(f"Query needs clarification: {needs_clarification}")





Cross-validation F1 scores: [1. 1. 1. 1. 1.]
Mean F1 score: 1.0
Validation Set Performance:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        10

    accuracy                           1.00        33
   macro avg       1.00      1.00      1.00        33
weighted avg       1.00      1.00      1.00        33

Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.24      0.33        41
           1       0.42      0.69      0.52        32

    accuracy                           0.44        73
   macro avg       0.46      0.47      0.42        73
weighted avg       0.46      0.44      0.41        73

Query needs clarification: False
