# This is the simpliest pipeline of Fian.

## There are 3 phases

- Intent Detection

- Feature Selection

- Respond

# Phase 1 - Intent Detection

1. Extract user's intent (using TF-IDF + Logistic Regression)

First, for good measure, I will train the model first (train it once and give out a joblib file, but there for visualization)

Second, I will use the model to predict what's the user's intent


In [13]:
# Model Training: Intent Detection
import pandas as pd
import joblib
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer

# STEP 1: Train from CSV and save model
def intent_LogReg(csv_path='intent_dataset_2000.csv'):
    # Load CSV
    df = pd.read_csv(csv_path)

    # Make sure the expected columns exist
    if 'text' not in df.columns or 'intent' not in df.columns:
        raise ValueError("CSV must contain 'text' and 'intent' columns")

    texts = df['text'].astype(str).tolist()
    labels = df['intent'].astype(str).tolist()

    # Pipeline: TF-IDF + Logistic Regression
    model = Pipeline([
        ('tfidf', TfidfVectorizer()),
        ('clf', LogisticRegression())
    ])

    # Train and save
    model.fit(texts, labels)
    joblib.dump(model, 'intent_with_LogisticRegression.joblib')
    print("Model trained and saved as 'intent_with_LogisticRegression.joblib'.")

def extract_intent_LogReg(text, threshold=0.5):
    try:
        model = joblib.load('intent_with_LogisticRegression.joblib')
        probs = model.predict_proba([text])[0]
        best_index = probs.argmax()
        confidence = probs[best_index]
        intent = model.classes_[best_index]

        if confidence >= threshold:
            return intent, confidence
        else:
            return "uncertain", confidence

    except FileNotFoundError:
        return "error: model not found — please run intent_LogReg() first", 0.0


intent_LogReg()


Model trained and saved as 'intent_with_LogisticRegression.joblib'.


In [14]:
## Random Forest
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

# Load the dataset (replace path if needed)
df = pd.read_csv("intent_dataset_2000.csv")


# Split into features and labels
X = df["text"]
y = df["intent"]

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# TF-IDF vectorization
vectorizer = TfidfVectorizer(max_features=3000, ngram_range=(1, 2))
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train Random Forest classifier
rf_model = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    random_state=42,
    n_jobs=-1
)
rf_model.fit(X_train_vec, y_train)

# Evaluate model
y_pred = rf_model.predict(X_test_vec)
print(classification_report(y_test, y_pred))

# Save model and vectorizer
joblib.dump(rf_model, "intent_with_RnFr.joblib")
joblib.dump(vectorizer, "vectorizer_for_RnFr.joblib")
import joblib
import numpy as np

def extract_intent_RnFr(text, threshold=0.5):
    try:
        # Load model and vectorizer
        model = joblib.load('intent_with_RnFr.joblib')
        vectorizer = joblib.load('vectorizer_for_RnFr.joblib')

        # Vectorize input
        X = vectorizer.transform([text])

        # Predict probabilities
        probs = model.predict_proba(X)[0]
        best_index = np.argmax(probs)
        confidence = probs[best_index]
        intent = model.classes_[best_index]

        if confidence >= threshold:
            return intent, confidence
        else:
            return "uncertain", confidence

    except FileNotFoundError:
        return "error: model or vectorizer not found — please train and save them first", 0.0


              precision    recall  f1-score   support

   calculate       1.00      1.00      1.00       400
     compare       1.00      1.00      1.00       400
     predict       1.00      1.00      1.00       400
  shows_info       1.00      1.00      1.00       400

    accuracy                           1.00      1600
   macro avg       1.00      1.00      1.00      1600
weighted avg       1.00      1.00      1.00      1600



In [2]:
import dateparser
from datetime import datetime

def extract_period_nlp(text):
    now = datetime.now()
    past_time = dateparser.parse(text, settings={'RELATIVE_BASE': now})

    if past_time:
        delta = now - past_time
        days = delta.days

        # Convert days into yfinance format
        if days <= 5:
            return f"{days}d"
        elif days <= 30:
            return f"{int(days/7)}wk"
        elif days <= 365:
            return f"{int(days/30)}mo"
        elif days <= 1825:
            return f"{int(days/365)}y"
        else:
            return "max"
    return "max"


In [16]:
from ipywidgets import widgets, Layout
from IPython.display import display

text_input = widgets.Text(
    placeholder='Type your message here...',
    description='You:',
    layout=Layout(width='80%')
)

output_area = widgets.Output(layout=Layout(border='1px solid black', height='300px', overflow_y='auto'))

def on_submit(sender):
    user_text = sender.value
    if user_text.lower() in ['exit', 'quit']:
        with output_area:
            print("Bot: Goodbye!")
        text_input.value = ''
        text_input.disabled = True
        return

    intent, confidence = extract_intent_RnFr(user_text)
    with output_area:
        if intent == "uncertain":
            print(f"Bot: Sorry, I didn't quite get that. Could you please rephrase or be more specific? (Confidence: {confidence:.2f})")
        elif intent.startswith("error:"):
            print(f"Bot: {intent}")
            text_input.disabled = True
        else:
            print(f"Bot: Intent detected -> {intent} (Confidence: {confidence:.2f})")

    text_input.value = ''

text_input.on_submit(on_submit)
display(text_input, output_area)


  text_input.on_submit(on_submit)


Text(value='', description='You:', layout=Layout(width='80%'), placeholder='Type your message here...')

Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_right='1px solid b…