## <span style='color:#ff5f27'> 📝 Imports

In [1]:
from xgboost import XGBRegressor
import hopsworks
from functions.llm_chain import load_model, get_llm_chain, generate_response
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

## <span style="color:#ff5f27;"> 🔮 Connect to Hopsworks Feature Store </span>

In [2]:
project = hopsworks.login()
fs = project.get_feature_store() 

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://snurran.hops.works/p/5240
Connected. Call `.close()` to terminate connection gracefully.


In [3]:
# Get_or_create the 'air_quality_fv' feature view
feature_view = fs.get_feature_view(
    name='air_quality_fv',
    version=1
)

# Initialize batch scoring
feature_view.init_batch_scoring(1)


## <span style="color:#ff5f27;">🪝 Retrieve AirQuality Model from Model Registry</span>

In [4]:
# Retrieve the model registry
mr = project.get_model_registry()

# Retrieve the 'air_quality_xgboost_model' from the model registry
retrieved_model = mr.get_model(
    name="air_quality_xgboost_model",
    version=1,
)

# Download the saved model artifacts  to a local directory
saved_model_dir = retrieved_model.download()

Connected. Call `.close()` to terminate connection gracefully.
Downloading model artifact (1 dirs, 6 files)... DONE

In [5]:
# Loading the XGBoost regressor model and label encoder from the saved model directory
# model_air_quality = joblib.load(saved_model_dir + "/xgboost_regressor.pkl")
model_air_quality = XGBRegressor()

model_air_quality.load_model(saved_model_dir + "/model.json")

# Displaying the retrieved XGBoost regressor model
model_air_quality

In [6]:
from functions.air_quality_data_retrieval import *
date_start = "2024-02-02"
date_end = "2024-02-04"
res = get_data_in_date_range(date_start, date_end, feature_view, model_air_quality)
print(res)

Finished: Reading data from Hopsworks, using ArrowFlight (0.83s) 


KeyError: "['pm25'] not in index"

## <span style='color:#ff5f27'>⬇️ LLM Loading

In [None]:
# Load the LLM and its corresponding tokenizer.
model_llm, tokenizer = load_model()

## <span style='color:#ff5f27'>⛓️ LangChain

In [None]:
# Create and configure a language model chain.
llm_chain = get_llm_chain(
    model_llm,
    tokenizer,
)

## <span style='color:#ff5f27'>🧬 Model Inference


In [None]:
QUESTION7 = "Hi!"

response7 = generate_response(
    QUESTION7,
    feature_view,
    model_llm, 
    tokenizer,
    model_air_quality,
    llm_chain,
    verbose=True,
)

print(response7)

In [None]:
QUESTION = "Who are you?"

response = generate_response(
    QUESTION,
    feature_view,
    model_llm,
    tokenizer,
    model_air_quality,
    llm_chain,
    verbose=True,
)

print(response)

In [None]:
QUESTION1 = "What was the average air quality from 2024-01-10 till 2024-01-14?"

response1 = generate_response(
    QUESTION1, 
    feature_view, 
    model_llm, 
    tokenizer, 
    model_air_quality, 
    llm_chain,
    verbose=True,
)

print(response1)

In [None]:
QUESTION11 = "When and what was the maximum air quality from 2024-01-10 till 2024-01-14?"

response11 = generate_response(
    QUESTION11, 
    feature_view, 
    model_llm,
    tokenizer,
    model_air_quality,
    llm_chain,
    verbose=True,
)

print(response11)

In [None]:
QUESTION12 = "When and what was the minimum air quality from 2024-01-10 till 2024-01-14?"

response12 = generate_response(
    QUESTION12, 
    feature_view, 
    model_llm, 
    tokenizer, 
    model_air_quality, 
    llm_chain,
    verbose=True,
)

print(response12)

In [None]:
QUESTION2 = "What was the air quality yesterday?"

response2 = generate_response(
    QUESTION2,
    feature_view, 
    model_llm,
    tokenizer,
    model_air_quality,
    llm_chain,
    verbose=True,
)

print(response2)

In [None]:
QUESTION3 = "What will the air quality be like in 2024-03-18?"

response3 = generate_response(
    QUESTION3, 
    feature_view, 
    model_llm, 
    tokenizer,
    model_air_quality,
    llm_chain,
    verbose=True,
)

print(response3)

In [None]:
QUESTION4 = "What will the air quality be like the day after tomorrow?"

response4 = generate_response(
    QUESTION4, 
    feature_view, 
    model_llm, 
    tokenizer, 
    model_air_quality, 
    llm_chain,
    verbose=True,
)

print(response4)

In [None]:
QUESTION5 = "What will the air quality be like on Sunday?"

response5 = generate_response(
    QUESTION5, 
    feature_view, 
    model_llm, 
    tokenizer, 
    model_air_quality, 
    llm_chain,
    verbose=True,
)

print(response5)

In [None]:
QUESTION7 = "What will the air quality be like on March 16?"

response7 = generate_response(
    QUESTION7, 
    feature_view,
    model_llm,
    tokenizer, 
    model_air_quality, 
    llm_chain,
    verbose=True,
)

print(response7)

In [None]:
QUESTION = "Is this level safe or not?"

response = generate_response(
    QUESTION7, 
    feature_view, 
    model_llm, 
    tokenizer,
    model_air_quality,
    llm_chain,
    verbose=True,
)

print(response)

In [None]:
QUESTION = "Is this air quality level dangerous?"

response = generate_response(
    QUESTION, 
    feature_view, 
    model_llm, 
    tokenizer,
    model_air_quality, 
    llm_chain,
    verbose=True,
)

print(response)

In [None]:
QUESTION = "Can you please explain different air quality levels?"

response = generate_response(
    QUESTION, 
    feature_view, 
    model_llm, 
    tokenizer,
    model_air_quality, 
    llm_chain,
    verbose=True,
)

print(response)

In [None]:
import gradio as gr
from transformers import pipeline
import numpy as np
import hopsworks
from xgboost import XGBRegressor
from functions.llm_chain import load_model, get_llm_chain, generate_response


In [None]:
# Initialize the ASR pipeline
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")

def transcribe(audio):
    sr, y = audio
    y = y.astype(np.float32)
    if y.ndim > 1 and y.shape[1] > 1:
        y = np.mean(y, axis=1)
    y /= np.max(np.abs(y))
    return transcriber({"sampling_rate": sr, "raw": y})["text"]

def generate_query_response(user_query):
    response = generate_response(
        user_query,
        feature_view,
        model_llm,
        tokenizer,
        model_air_quality,
        llm_chain,
        verbose=False,
    )
    return response

def handle_input(text_input=None, audio_input=None):
    if audio_input is not None:
        user_query = transcribe(audio_input)
    else:
        user_query = text_input
    
    if user_query:
        return generate_query_response(user_query)
    else:
        return "Please provide input either via text or voice."

iface = gr.Interface(
    fn=handle_input,
    inputs=[gr.Textbox(placeholder="Type here or use voice input..."), gr.Audio()],
    outputs="text",
    title="🌤️ AirQuality AI Assistant 💬",
    description="Ask your questions about air quality or use your voice to interact."
)

iface.launch(share=True)


---