# This is the simpliest pipeline of Fian.

## There are 3 phases

- Intent Detection

- Feature Selection

- Respond

# Phase 1 - Intent Detection

1. Extract user's intent (using TF-IDF + Logistic Regression)

First, for good measure, I will train the model first (train it once and give out a joblib file, but I put it here for visualization)

Second, I will use the model to predict what's the user's intent


In [9]:
# Model Training: Intent Detection

# Please run once to get the joblib file
import pandas as pd
import joblib
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer

# STEP 1: Train from CSV and save model
def intent_LogReg(csv_path='intent_dataset.csv'):
    # Load CSV
    df = pd.read_csv(csv_path)

    # Make sure the expected columns exist
    if 'text' not in df.columns or 'intent' not in df.columns:
        raise ValueError("CSV must contain 'text' and 'intent' columns")

    texts = df['text'].astype(str).tolist()
    labels = df['intent'].astype(str).tolist()

    # Pipeline: TF-IDF + Logistic Regression
    model = Pipeline([
        ('tfidf', TfidfVectorizer()),
        ('clf', LogisticRegression())
    ])

    # Train and save
    model.fit(texts, labels)
    joblib.dump(model, 'intent_with_LogisticRegression.joblib')
    print("Model trained and saved as 'intent_with_LogisticRegression.joblib'.")

## == == == -- -- -- Main -- -- -- == == == ## 
intent_LogReg()

Model trained and saved as 'intent_with_LogisticRegression.joblib'.


In [13]:
def extract_intent(text, threshold=0.6):
    try:
        model = joblib.load('intent_with_LogisticRegression.joblib')
        probs = model.predict_proba([text])[0]
        best_index = probs.argmax()
        confidence = probs[best_index]
        intent = model.classes_[best_index]

        if confidence >= threshold:
            return intent, confidence
        else:
            return "uncertain", confidence

    except FileNotFoundError:
        return "error: model not found — please run intent_LogReg() first", 0.0


In [16]:
from ipywidgets import widgets, Layout
from IPython.display import display

text_input = widgets.Text(
    placeholder='Type your message here...',
    description='You:',
    layout=Layout(width='80%')
)

output_area = widgets.Output(layout=Layout(border='1px solid black', height='300px', overflow_y='auto'))

def on_submit(sender):
    user_text = sender.value
    if user_text.lower() in ['exit', 'quit']:
        with output_area:
            print("Bot: Goodbye!")
        text_input.value = ''
        text_input.disabled = True
        return

    intent, confidence = extract_intent(user_text)
    with output_area:
        if intent == "uncertain":
            print(f"Bot: Sorry, I didn't quite get that. Could you please rephrase or be more specific? (Confidence: {confidence:.2f})")
        elif intent.startswith("error:"):
            print(f"Bot: {intent}")
            text_input.disabled = True
        else:
            print(f"Bot: Intent detected -> {intent} (Confidence: {confidence:.2f})")

    text_input.value = ''

text_input.on_submit(on_submit)
display(text_input, output_area)


  text_input.on_submit(on_submit)


Text(value='', description='You:', layout=Layout(width='80%'), placeholder='Type your message here...')

Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_right='1px solid b…