#Anime recommendation Chatbot App with ChatGPT, LangChain and Streamlit

#Install Dependencies

In [None]:
# Install necessary packages using pip
!pip install scikit-surprise
!pip install --upgrade scikit-learn
!pip install streamlit==1.32.2
!pip install pyngrok==7.1.5
!pip install langchain==0.1.12
!pip install langchain-openai==0.0.8
!pip install langchain-community==0.0.29

#Load OpenAI API Credentials

In [None]:
from getpass import getpass

OPENAI_KEY = getpass('Enter Open AI API Key: ')

#Set Environment Variable

In [None]:
import os
os.environ['OPENAI_API_KEY'] = OPENAI_KEY

#Write App Code Header

In [None]:
%%writefile app.py
import streamlit as st
import joblib
import pandas as pd
from surprise import Dataset, Reader, SVD
import warnings
warnings.filterwarnings('ignore')
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain_community.chat_message_histories import StreamlitChatMessageHistory

# Load the pre-trained models and components
try:
    # Load the pre-trained models and components
    svd = joblib.load('/content/drive/MyDrive/svd_model.joblib')
    tfidf = joblib.load('/content/drive/MyDrive/tfidf_vectorizer.joblib')
    cosine_sim = joblib.load('/content/drive/MyDrive/cosine_sim_matrix.joblib')
    indices = joblib.load('/content/drive/MyDrive/indices.joblib')

    # Load datasets
    anime_df = pd.read_csv('/content/drive/MyDrive/anime.csv')
    rating_df = pd.read_csv('/content/drive/MyDrive/rating.csv')
except FileNotFoundError:
    st.error("Please make sure you have uploaded the model files (svd_model.joblib, tfidf_vectorizer.joblib, cosine_sim_matrix.joblib, indices.joblib, anime.csv, rating.csv) to the Colab environment.")
    st.stop()

# Data cleaning (ensure this matches your notebook)
anime_df.drop_duplicates(inplace=True)
rating_df.drop_duplicates(inplace=True)
rating_df = rating_df[rating_df['rating'] != -1]
anime_df['genre'] = anime_df['genre'].fillna('')

# Define recommendation functions (same as before)
def get_svd_recommendations(user_id, n=10):
    all_anime_ids = rating_df['anime_id'].unique()
    rated_anime = rating_df[rating_df['user_id'] == user_id]['anime_id'].tolist()
    candidates = [aid for aid in all_anime_ids if aid not in rated_anime]
    predictions = [(aid, svd.predict(user_id, aid).est) for aid in candidates]
    predictions.sort(key=lambda x: x[1], reverse=True)
    top_n = predictions[:n]
    return pd.DataFrame([
        {'anime_id': aid, 'name': anime_df.loc[anime_df['anime_id'] == aid, 'name'].values[0], 'predicted_rating': pred}
        for aid, pred in top_n if len(anime_df.loc[anime_df['anime_id'] == aid, 'name'].values) > 0
    ])

def get_content_recommendations(title, n=10):
    if title not in indices:
        return "Anime title not found in the dataset."
    idx = indices[title]
    sim_scores = sorted(list(enumerate(cosine_sim[idx])), key=lambda x: x[1], reverse=True)[1:n+1]
    anime_indices = [i[0] for i in sim_scores]
    return anime_df.iloc[anime_indices][['anime_id', 'name', 'genre', 'rating']]

def hybrid_recommendations(user_id, anime_title, alpha=0.5, n=10):
    svd_recs = get_svd_recommendations(user_id, n=500)
    if anime_title not in indices:
        return "Anime title not found for content-based filtering."
    title_idx = indices[anime_title]
    scores = []
    for _, row in svd_recs.iterrows():
        candidate_idx = anime_df[anime_df['anime_id'] == row['anime_id']].index[0]
        content_score = cosine_sim[title_idx][candidate_idx]
        collab_score = (row['predicted_rating'] - 1) / 9
        hybrid_score = alpha * collab_score + (1 - alpha) * content_score
        scores.append((row['anime_id'], row['name'], hybrid_score))
    scores.sort(key=lambda x: x[2], reverse=True)
    top_n = scores[:n]
    return pd.DataFrame(top_n, columns=['anime_id', 'name', 'hybrid_score'])

def create_test_user(anime_ratings, user_id=None):
    global rating_df
    if user_id is None:
        if rating_df.empty:
            user_id = 1
        else:
            user_id = rating_df['user_id'].max() + 1
    new_ratings = []
    not_found_titles = []
    for title, rating in anime_ratings.items():
        anime_entry = anime_df[anime_df['name'] == title]
        if anime_entry.empty:
            not_found_titles.append(title)
            continue
        anime_id = anime_entry['anime_id'].values[0]
        new_ratings.append({
            'user_id': user_id,
            'anime_id': anime_id,
            'rating': rating
        })
    if not_found_titles:
        return None, None, f"Anime titles not found: {', '.join(not_found_titles)}"
    if not new_ratings:
        return None, None, "No valid anime found for new user creation."
    new_ratings_df = pd.DataFrame(new_ratings)
    rating_df = pd.concat([rating_df, new_ratings_df], ignore_index=True)
    return new_ratings_df, user_id, None


# Initialize chat history
streamlit_msg_history = StreamlitChatMessageHistory(key="anime_chat_messages")
chatgpt = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.5)

if len(streamlit_msg_history.messages) == 0:
    streamlit_msg_history.add_ai_message("Hello! How can I help you with anime recommendations today?")

for msg in streamlit_msg_history.messages:
    st.chat_message(msg.type).write(msg.content)

if user_prompt := st.chat_input():
    st.chat_message("user").write(user_prompt)

    # --- Intent Recognition Chain ---
    intent_prompt_template = """
    You are an anime recommendation assistant.  A user has provided a message, determine their intent.
    The possible intents are:

    get_user_recommendations: The user wants anime recommendations based on a user ID. The message should contain a user ID.
    get_anime_recommendations: The user wants anime recommendations based on an anime title. The message should contain an anime title.
    get_hybrid_recommendations: The user wants hybrid recommendations based on a user ID and an anime title. The message should contain both.
    create_new_user: The user wants to create a new user profile and provide anime titles and ratings.
    unknown: The user's intent is not clear or doesn't fit into the above categories.

    Analyze the following user message and determine the intent. Respond with just the intent name (e.g., get_user_recommendations, unknown).

    User message: {message}
    """
    intent_prompt = ChatPromptTemplate.from_template(intent_prompt_template)
    intent_chain = intent_prompt | chatgpt

    # --- Parameter Extraction Chains (Example for User ID and Anime Title) ---
    user_id_extraction_prompt_template = "Extract the user ID from the following text. If no user ID is found, respond with 'None'. Text: {text}"
    user_id_extraction_prompt = ChatPromptTemplate.from_template(user_id_extraction_prompt_template)
    user_id_extraction_chain = user_id_extraction_prompt | chatgpt

    anime_title_extraction_prompt_template = "Extract the anime title from the following text. If no anime title is found, respond with 'None'. Text: {text}"
    anime_title_extraction_prompt = ChatPromptTemplate.from_template(anime_title_extraction_prompt_template)
    anime_title_extraction_chain = anime_title_extraction_prompt | chatgpt


    # --- Handle User Input and Intents ---
    ai_response_content = ""
    intent_response = intent_chain.invoke({"message": user_prompt})
    intent = intent_response.content.strip()

    if intent == "get_user_recommendations":
        user_id_str = user_id_extraction_chain.invoke({"text": user_prompt}).content.strip()
        if user_id_str.lower() != 'none':
            try:
                user_id = int(user_id_str)
                recommendations_df = get_svd_recommendations(user_id, n=10) # Fixed n=10 for now, can be made dynamic
                if recommendations_df is not None and not recommendations_df.empty:
                    ai_response_content = "Here are anime recommendations for user ID {}:\n{}".format(user_id, recommendations_df.to_string())
                else:
                    ai_response_content = "No recommendations found for user ID {} or user ID not found.".format(user_id)
            except ValueError:
                ai_response_content = "Invalid user ID format. Please provide a number."
        else:
            ai_response_content = "Please provide a user ID to get recommendations."

    elif intent == "get_anime_recommendations":
        anime_title = anime_title_extraction_chain.invoke({"text": user_prompt}).content.strip()
        if anime_title.lower() != 'none':
            recommendations_df = get_content_recommendations(anime_title, n=10) # Fixed n=10 for now
            if isinstance(recommendations_df, pd.DataFrame):
                ai_response_content = "Here are anime recommendations based on the title '{}':\n{}".format(anime_title, recommendations_df.to_string())
            else:
                ai_response_content = recommendations_df # Error message from function
        else:
            ai_response_content = "Please provide an anime title to get recommendations."

    elif intent == "get_hybrid_recommendations":
        user_id_str = user_id_extraction_chain.invoke({"text": user_prompt}).content.strip()
        anime_title = anime_title_extraction_chain.invoke({"text": user_prompt}).content.strip()

        user_id = None
        if user_id_str.lower() != 'none':
            try:
                user_id = int(user_id_str)
            except ValueError:
                ai_response_content = "Invalid user ID format. Please provide a number for user ID."
                user_id = None # to prevent further processing with invalid user_id

        if anime_title.lower() == 'none':
            ai_response_content = "Please provide both a user ID and an anime title for hybrid recommendations."
        elif user_id is not None and not ai_response_content: # proceed only if user_id is valid and no error message set yet
            recommendations_df = hybrid_recommendations(user_id, anime_title, n=10) # Fixed n=10
            if isinstance(recommendations_df, pd.DataFrame):
                ai_response_content = "Here are hybrid recommendations for user ID {} based on anime '{}':\n{}".format(user_id, anime_title, recommendations_df.to_string())
            else:
                ai_response_content = recommendations_df # Error message from function

    elif intent == "create_new_user":
        ai_response_content = "Okay, let's create a new user. Please provide anime titles and your ratings for them.  You can list them like this:  `Naruto,9\nOne Piece,8\nAttack on Titan,10` (one anime and rating per line)."
        st.session_state.awaiting_new_user_ratings = True # Set a session state flag

    elif intent == "unknown":
        ai_response_content = "I'm sorry, I didn't understand what you want to do.  Please clarify if you want recommendations by user ID, by anime title, hybrid recommendations, or to create a new user."

    else:
        ai_response_content = "Something went wrong processing your request. Please try again."

    if 'awaiting_new_user_ratings' in st.session_state and st.session_state.awaiting_new_user_ratings:
        if intent != "create_new_user" and user_prompt: # User provided input after 'create_new_user' intent
            ratings_input = user_prompt
            ratings_dict = {}
            error_parsing = False
            lines = ratings_input.strip().split('\n')
            for line in lines:
                parts = line.split(',')
                if len(parts) == 2:
                    title = parts[0].strip()
                    rating_str = parts[1].strip()
                    try:
                        rating = float(rating_str)
                        ratings_dict[title] = rating
                    except ValueError:
                        ai_response_content = "Invalid rating format: '{}'. Please use numbers for ratings.".format(rating_str)
                        error_parsing = True
                        break
                else:
                    ai_response_content = "Invalid input format. Please use 'Title,Rating' format for each line."
                    error_parsing = True
                    break

            if not error_parsing:
                if ratings_dict:
                    new_user_ratings_df, new_user_id, error_msg = create_test_user(ratings_dict)
                    if new_user_id is not None:
                        ai_response_content = "New user created with User ID: {}. Ratings added:\n{}.\nHere are anime recommendations for your new user (User ID: {}):\n{}".format(
                            new_user_id, new_user_ratings_df.to_string(), new_user_id, get_svd_recommendations(new_user_id, n=10).to_string()
                        )
                        st.session_state.awaiting_new_user_ratings = False # Reset flag after processing
                    else:
                        ai_response_content = error_msg or "Failed to create new user."
                        st.session_state.awaiting_new_user_ratings = False # Reset flag even on failure
                else:
                    ai_response_content = "No anime titles and ratings provided. New user not created."
                    st.session_state.awaiting_new_user_ratings = False # Reset flag if no ratings provided


    if ai_response_content:
        streamlit_msg_history.add_ai_message(ai_response_content)
        st.chat_message("ai").write(ai_response_content)

#Starting the Streamlit App

In [None]:
!streamlit run app.py --server.port=8989 &>/./logs.txt &

#Setting Up ngrok Tunnel

In [None]:
from getpass import getpass

ngrok_auth_token = getpass('Enter ngrok API Key: ')

In [None]:
from pyngrok import ngrok
import yaml

# Terminate open tunnels if exist
ngrok.kill()

# Authenticate ngrok with the token read from the file
!ngrok config add-authtoken {ngrok_auth_token}

# Open an HTTPS tunnel on port XXXX which you get from your `logs.txt` file
ngrok_tunnel = ngrok.connect(8989)
print("Streamlit App:", ngrok_tunnel.public_url)