In [1]:
#%pip install pandas numpy torch transformers langchain fastapi uvicorn
#%pip install --upgrade langchain


In [2]:
import pandas as pd
# import numpy as np
from transformers import pipeline
from langchain.llms import HuggingFacePipeline

# Load Data
booking_data = pd.read_csv("data/booking_history.csv")
user_data = pd.read_csv("data/user_data.csv")

# Merge datasets
data = booking_data.merge(user_data, on="User ID", how="left")

# Convert categorical features into numerical encoding
data["Preferred Class"] = data["Preferred Class"].astype("category").cat.codes
data["Seat Preference"] = data["Seat Preference"].astype("category").cat.codes

# Normalize ticket prices
data["Ticket Price"] = (data["Ticket Price"] - data["Ticket Price"].min()) / (
    data["Ticket Price"].max() - data["Ticket Price"].min()
)

print("Preprocessing Done! Sample Data:\n", data.head())


Preprocessing Done! Sample Data:
                                 User ID   Train Name Departure Arrival  \
0  b3d5de0a-f350-4fc2-a5e8-1005a9864d7f    Superfast     CityC   CityH   
1  b3d5de0a-f350-4fc2-a5e8-1005a9864d7f      Express     CityC   CityB   
2  4d4d8ff3-7b7a-4191-814c-c3c079b01211    Intercity     CityC   CityD   
3  b3d5de0a-f350-4fc2-a5e8-1005a9864d7f     Regional     CityC   CityH   
4  b2549fa9-b299-48e0-b0f5-7a4f3811048b  Night Train     CityE   CityF   

         Date  Ticket Price  Seat Preference  Age Gender  \
0  2024-12-27      0.021574                1   59   Male   
1  2024-11-12      0.875484                1   59   Male   
2  2024-11-19      0.418606                0   51   Male   
3  2024-09-07      0.179963                1   59   Male   
4  2024-10-29      0.052849                0   37  Other   

               Location  Preferred Class Loyalty Status  
0          Mercadoshire                0         Bronze  
1          Mercadoshire                0    

In [3]:
import pandas as pd
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
# Load a pre-trained text classification model
MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english"  # Replace with a fine-tuned recommender model if available
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=5, ignore_mismatched_sizes=True)

# Define function to get ticket recommendations
def recommend_ticket(user_id):
    # Extract user details
    user_info = data[data["User ID"] == user_id].iloc[0]
    
    # Generate input text for recommendation
    input_text = f"User prefers {user_info['Preferred Class']} class and {user_info['Seat Preference']} seat. \
                   Previous booking was {user_info['Train Name']} with price {user_info['Ticket Price']}."

    # Tokenize input
    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
    
    # Predict ticket category
    with torch.no_grad():
        outputs = model(**inputs)
        prediction = torch.argmax(outputs.logits).item()

    # Return recommended train ticket category
    categories = ["Budget", "Standard", "Business", "Luxury", "Sleeper"]
    return f"Recommended Ticket Category: {categories[prediction]}"


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([2]) in the checkpoint and torch.Size([5]) in the model instantiated
- classifier.weight: found shape torch.Size([2, 768]) in the checkpoint and torch.Size([5, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
from langchain.schema import LLMResult
from langchain.llms.base import LLM

class TrainTicketLLM(LLM):
    """Custom LLM for recommending train tickets using Hugging Face model"""

    def _call(self, prompt: str, stop=None) -> str:
        user_id = prompt.strip()
        return recommend_ticket(user_id)

    def _llm_type(self) -> str:
        return "custom_train_ticket_recommender"

# Instantiate the LLM
llm = TrainTicketLLM()

# LangChain-powered Recommendation System
def get_ticket_recommendation(user_id):
    return llm(user_id)


In [10]:
import nest_asyncio
import uvicorn
from fastapi import FastAPI

# Apply nest_asyncio to allow running FastAPI inside Jupyter
nest_asyncio.apply()

# Initialize FastAPI app
app = FastAPI()

# Import recommendation function
#from recommend_system import recommend_ticket  

@app.get("/recommend/{user_id}")
async def get_recommendation(user_id: str):
    ticket_recommendation = recommend_ticket(user_id)
    return {"user_id": user_id, "recommendation": ticket_recommendation}

# Run FastAPI inside Jupyter Notebook
uvicorn.run(app, host="0.0.0.0", port=8000)


INFO:     Started server process [64167]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:64540 - "GET / HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:64540 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:64540 - "GET / HTTP/1.1" 404 Not Found


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [64167]
