<a href="https://colab.research.google.com/github/Snjkmr162/BLS_Chatbot_with_RL/blob/main/Chatbot_with_RL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Start

# Installs & Imports

In [33]:
!pip install requests pandas matplotlib scikit-learn transformers torch



In [34]:
import requests
import pandas as pd
import numpy as np
import pickle
import os
import random

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# BLS Data Fetching & Loading

In [35]:
def fetch_bls_series(series_id, start_year, end_year):
    url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"
    payload = {
        "seriesid": [series_id],
        "startyear": start_year,
        "endyear": end_year
    }

    response = requests.post(url, json=payload).json()
    data = response["Results"]["series"][0]["data"]

    df = pd.DataFrame(data)
    df["year"] = df["year"].astype(int)
    df["value"] = df["value"].astype(float)
    return df.sort_values(["year", "period"])

In [36]:
# Unemployment rate
unemployment_df = fetch_bls_series("LNS14000000", "2019", "2024")

# CPI for inflation
cpi_df = fetch_bls_series("CUUR0000SA0", "2019", "2024")
cpi_df["inflation_yoy"] = cpi_df["value"].pct_change(12) * 100

# Rule Based Explanations

In [37]:
def explain_unemployment():
    peak = unemployment_df["value"].max()
    latest = unemployment_df.iloc[-1]["value"]
    return (
        f"U.S. unemployment peaked at {peak:.1f}% during COVID and has since "
        f"recovered to about {latest:.1f}%, reflecting gradual labor market recovery."
    )

def explain_inflation():
    peak = cpi_df["inflation_yoy"].max()
    recent = cpi_df["inflation_yoy"].iloc[-1]
    return (
        f"Inflation peaked at around {peak:.1f}% due to supply shocks and strong demand. "
        f"Recently it has eased to roughly {recent:.1f}% year-over-year."
    )

def auto_reward(intent, confidence, action):
    """
    action: 0 = REJECT, 1 = ANSWER
    """
    # Rejecting when unsure is good
    if action == 0 and confidence < 0.4:
        return 0.5

    # Answering confidently is very good
    if action == 1 and confidence >= 0.6:
        return 1.0

    # Answering with medium confidence
    if action == 1 and 0.4 <= confidence < 0.6:
        return 0.5

    # Rejecting despite high confidence is bad
    if action == 0 and confidence >= 0.6:
        return -1.0

    # Answering UNKNOWN intent is bad
    if intent == "UNKNOWN" and action == 1:
        return -1.0

    return 0

# ML Intent Classifier

In [38]:
training_sentences = [
    # Unemployment
    "why did unemployment rise",
    "why did unemployment spike during covid",
    "job losses in 2020",
    "mass layoffs",
    "are jobs recovering",
    "is unemployment falling",
    "labor market recovery",
    "unemployment rate",

    # Inflation
    "what is inflation",
    "inflation rate",
    "are prices rising",
    "why are prices high",
    "why did prices increase",
    "cost of living increase",
    "inflation after covid",
    "is inflation cooling",

    # Comparison
    "relationship between inflation and unemployment",
    "jobs and prices connection",
    "how are jobs and prices connected",
    "inflation vs unemployment",
    "can inflation and unemployment both be high"
]

training_labels = (
    ["UNEMPLOYMENT"] * 8 +
    ["INFLATION"] * 8 +
    ["COMPARISON"] * 5
)


vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(training_sentences)

intent_model = LogisticRegression()
intent_model.fit(X, training_labels)

In [39]:
def detect_intent_with_confidence(text):
    vec = vectorizer.transform([text])
    probs = intent_model.predict_proba(vec)[0]
    intent = intent_model.classes_[np.argmax(probs)]
    confidence = np.max(probs)

    if confidence < 0.4:
        return "UNKNOWN", confidence
    return intent, confidence

# LLM

Load LLM

In [40]:
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
llm_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

Fact Builder

In [41]:
def build_facts(intent, question):
    q = question.lower()

    if intent == "UNEMPLOYMENT":
        if any(word in q for word in ["spike", "loss", "layoff", "covid"]):
            return (
                "Unemployment spiked in 2020 due to COVID-19 lockdowns, "
                "business closures, and a sudden collapse in economic activity."
            )

        if any(word in q for word in ["recover", "finding jobs", "down", "improving"]):
            latest = unemployment_df.iloc[-1]["value"]
            return (
                f"As the economy reopened, hiring resumed across many sectors. "
                f"The unemployment rate has since fallen to around {latest:.1f}%, "
                "reflecting labor market recovery."
            )

        return explain_unemployment()

LLM Response

In [42]:
def generate_llm_response(question, facts):
    prompt = f"""
You are a U.S. labor economics assistant.
Facts:
{facts}

Question: {question}
Answer clearly and concisely:
"""
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
    outputs = llm_model.generate(**inputs, max_length=200)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# RL

Reinforcement Learning Setup

In [43]:
confidence_bins = np.linspace(0, 1, 11)
Q_table = np.zeros((10, 2))

learning_rate = 0.1
discount_factor = 0.9

epsilon = 1.0
epsilon_min = 0.05
epsilon_decay = 0.995

Load saved Q-table

In [44]:
if os.path.exists("q_table.pkl"):
    with open("q_table.pkl", "rb") as f:
        Q_table = pickle.load(f)

RL Helpers

In [45]:
def get_confidence_bin(confidence):
    return min(int(confidence * 10), 9)

def choose_action(confidence):
    state = get_confidence_bin(confidence)
    if random.random() < epsilon:
        return random.choice([0, 1])
    return np.argmax(Q_table[state])

def update_q_table(state, action, reward):
    Q_table[state, action] += learning_rate * (
        reward + discount_factor * np.max(Q_table[state]) - Q_table[state, action]
    )

In [46]:
print(get_confidence_bin(0.73))

7


# Chatbot Loop

In [47]:
print("ðŸ“Š BLS Labor Market Chatbot")
print("Ask me about unemployment, inflation, or trends.")
print("Type 'exit' to quit.\n")

while True:
    user_input = input("You: ").strip()

    if user_input.lower() == "exit":
        print("Chatbot: Goodbye!")
        break

    intent, confidence = detect_intent_with_confidence(user_input)
    state = get_confidence_bin(confidence)
    action = choose_action(confidence)

if action == 0 or intent == "UNKNOWN":
    print("Chatbot: Iâ€™m not fully confident about that question yet.")
else:
    facts = build_facts(intent, user_input)
    response = generate_llm_response(user_input, facts)
    print("Chatbot:", response)

reward = auto_reward(intent, confidence, action)
update_q_table(state, action, reward)

epsilon = max(epsilon_min, epsilon * epsilon_decay)

ðŸ“Š BLS Labor Market Chatbot
Ask me about unemployment, inflation, or trends.
Type 'exit' to quit.

You: Why are prices high in the US?
You: Why did unemployment spike during COVID?
You: exit
Chatbot: Goodbye!
Chatbot: COVID


Saving the learning

In [49]:
with open("q_table.pkl", "wb") as f:
    pickle.dump(Q_table, f)