#Anime recommendation Chatbot App with ChatGPT, LangChain and Streamlit

#Install Dependencies

In [None]:
# Install necessary packages using pip
!pip install scikit-surprise
!pip install --upgrade scikit-learn
!pip install streamlit==1.32.2
!pip install pyngrok==7.1.5
!pip install langchain==0.1.12
!pip install langchain-openai==0.0.8
!pip install langchain-community==0.0.29

#Load OpenAI API Credentials

In [None]:
from getpass import getpass

OPENAI_KEY = getpass('Enter Open AI API Key: ')

#Set Environment Variable

In [None]:
import os
os.environ['OPENAI_API_KEY'] = OPENAI_KEY

#Write App Code Header

In [None]:
%%writefile app.py
import streamlit as st
import joblib
import pandas as pd
import numpy as np
from surprise import Dataset, Reader, SVD
import warnings
warnings.filterwarnings('ignore')
from langchain_openai import ChatOpenAI

# Load the pre-trained models and components
try:
    # Load the pre-trained models and components
    svd = joblib.load('/content/drive/MyDrive/svd_model.joblib')
    tfidf = joblib.load('/content/drive/MyDrive/tfidf_vectorizer.joblib')
    cosine_sim = joblib.load('/content/drive/MyDrive/cosine_sim_matrix.joblib')
    indices = joblib.load('/content/drive/MyDrive/indices.joblib')

    # Load datasets
    anime_df = pd.read_csv('/content/drive/MyDrive/anime.csv')
    rating_df = pd.read_csv('/content/drive/MyDrive/rating.csv')

except FileNotFoundError:
    st.error("Please make sure you have uploaded the model files (svd_model.joblib, tfidf_vectorizer.joblib, cosine_sim_matrix.joblib, indices.joblib, anime.csv, rating.csv) to the Colab environment.")
    st.stop()

# Data cleaning (ensure this matches your notebook)
anime_df.drop_duplicates(inplace=True)
rating_df.drop_duplicates(inplace=True)
rating_df = rating_df[rating_df['rating'] != -1]
anime_df['genre'] = anime_df['genre'].fillna('')

# Define function to get SVD recommendations
def get_svd_recommendations(user_id, n=10):
    all_anime_ids = rating_df['anime_id'].unique()
    rated_anime = rating_df[rating_df['user_id'] == user_id]['anime_id'].tolist()
    candidates = [aid for aid in all_anime_ids if aid not in rated_anime]
    predictions = [(aid, svd.predict(user_id, aid).est) for aid in candidates]
    predictions.sort(key=lambda x: x[1], reverse=True)
    top_n = predictions[:n]
    return pd.DataFrame([
        {'anime_id': aid, 'name': anime_df.loc[anime_df['anime_id'] == aid, 'name'].values[0], 'predicted_rating': pred}
        for aid, pred in top_n if len(anime_df.loc[anime_df['anime_id'] == aid, 'name'].values) > 0
    ])

# Define function to get content-based recommendations
def get_content_recommendations(title, n=10):
    if title not in indices:
        return "Anime title not found in the dataset."
    idx = indices[title]
    sim_scores = sorted(list(enumerate(cosine_sim[idx])), key=lambda x: x[1], reverse=True)[1:n+1]
    anime_indices = [i[0] for i in sim_scores]
    return anime_df.iloc[anime_indices][['anime_id', 'name', 'genre', 'rating']]

# Hybrid recommendations function
def hybrid_recommendations(user_id, anime_title, alpha=0.5, n=10):
    """
    Returns hybrid recommendations based on:
    - SVD collaborative filtering predictions.
    - Cosine similarity with a given anime title.
    """
    # Get collaborative filtering recommendations (many candidates).
    svd_recs = get_svd_recommendations(user_id, n=500)

    # Get index of the context anime for content-based similarity.
    if anime_title not in indices:
        return "Anime title not found for content-based filtering."
    title_idx = indices[anime_title]

    scores = []
    # For each candidate from SVD, compute a hybrid score.
    for _, row in svd_recs.iterrows():
        candidate_idx = anime_df[anime_df['anime_id'] == row['anime_id']].index[0]
        content_score = cosine_sim[title_idx][candidate_idx]
        # Normalize the collaborative score (assuming ratings range from 1 to 10).
        collab_score = (row['predicted_rating'] - 1) / 9
        hybrid_score = alpha * collab_score + (1 - alpha) * content_score
        scores.append((row['anime_id'], row['name'], hybrid_score))

    # Sort the candidates by the hybrid score (highest first).
    scores.sort(key=lambda x: x[2], reverse=True)
    top_n = scores[:n]
    return pd.DataFrame(top_n, columns=['anime_id', 'name', 'hybrid_score'])


def create_test_user(anime_ratings, user_id=None):
    """
    Create a test user with custom ratings for multiple anime and add the user to the global rating_df.
    """
    global rating_df

    if user_id is None:
        if rating_df.empty:
            user_id = 1
        else:
            user_id = rating_df['user_id'].max() + 1

    new_ratings = []
    not_found_titles = []
    for title, rating in anime_ratings.items():
        anime_entry = anime_df[anime_df['name'] == title]
        if anime_entry.empty:
            not_found_titles.append(title)
            continue
        anime_id = anime_entry['anime_id'].values[0]
        new_ratings.append({
            'user_id': user_id,
            'anime_id': anime_id,
            'rating': rating
        })

    if not_found_titles:
        st.warning(f"Anime titles not found: {', '.join(not_found_titles)}")

    if not new_ratings:
        st.warning("No valid anime found for new user creation.")
        return None, None

    new_ratings_df = pd.DataFrame(new_ratings)
    rating_df = pd.concat([rating_df, new_ratings_df], ignore_index=True)
    return new_ratings_df, user_id


st.title("Anime Recommendation System")

with st.sidebar:
    st.header("Anime Recommendations")
    st.write("Explore different types of anime recommendations.")

    recommendation_type = st.selectbox("Choose Recommendation Type",
                                        ["By User ID", "By Anime Title", "Hybrid (User & Anime)", "Create New User"])
    num_recommendations = st.slider("Number of Recommendations", 1, 30, 10)

if recommendation_type == "By User ID":
    user_id_input = st.number_input("Enter User ID", min_value=1, step=1)
    if st.button("Get User Recommendations"):
        if user_id_input:
            recommendations_df = get_svd_recommendations(user_id_input, n=num_recommendations)
            if recommendations_df is not None and not recommendations_df.empty:
                st.dataframe(recommendations_df)
            else:
                st.write("No recommendations found for this user ID or user ID not found.")
        else:
            st.warning("Please enter a User ID.")

elif recommendation_type == "By Anime Title":
    anime_title_input = st.text_input("Enter Anime Title")
    if st.button("Get Anime-Based Recommendations"):
        if anime_title_input:
            recommendations_df = get_content_recommendations(anime_title_input, n=num_recommendations)
            if isinstance(recommendations_df, pd.DataFrame): # Check if it's DataFrame
                st.dataframe(recommendations_df)
            else:
                st.write(recommendations_df) # It's an error message string
        else:
            st.warning("Please enter an Anime Title.")

elif recommendation_type == "Hybrid (User & Anime)":
    user_id_hybrid_input = st.number_input("Enter User ID for Hybrid", min_value=1, step=1)
    anime_title_hybrid_input = st.text_input("Enter Anime Title for Hybrid")
    if st.button("Get Hybrid Recommendations"):
        if user_id_hybrid_input and anime_title_hybrid_input:
            recommendations_df = hybrid_recommendations(user_id_hybrid_input, anime_title_hybrid_input, n=num_recommendations)
            if isinstance(recommendations_df, pd.DataFrame): # Check if it's DataFrame
                st.dataframe(recommendations_df)
            else:
                st.write(recommendations_df) # It's an error message string
        else:
            st.warning("Please enter both User ID and Anime Title for hybrid recommendations.")

elif recommendation_type == "Create New User":
    anime_titles_new_user_input = st.text_area("Enter Anime Titles and Ratings (one per line, format: Title,Rating - e.g., Naruto,9)")
    if st.button("Create New User and Get Recommendations"):
        if anime_titles_new_user_input:
            ratings_dict = {}
            error_parsing = False
            lines = anime_titles_new_user_input.strip().split('\n')
            for line in lines:
                parts = line.split(',')
                if len(parts) == 2:
                    title = parts[0].strip()
                    rating_str = parts[1].strip()
                    try:
                        rating = float(rating_str)
                        ratings_dict[title] = rating
                    except ValueError:
                        st.error(f"Invalid rating format: '{rating_str}'. Please use numbers for ratings.")
                        error_parsing = True
                        break
                else:
                    st.error("Invalid input format. Please use 'Title,Rating' format for each line.")
                    error_parsing = True
                    break
            if not error_parsing:
                if ratings_dict:
                    new_user_ratings_df, new_user_id = create_test_user(ratings_dict)
                    if new_user_id is not None:
                        st.success(f"New user created with User ID: {new_user_id}")
                        st.write("Ratings added for the new user:")
                        st.dataframe(new_user_ratings_df)
                        st.write(f"Getting recommendations for the new user (User ID: {new_user_id}):")
                        recommendations_df = get_svd_recommendations(new_user_id, n=num_recommendations)
                        if recommendations_df is not None and not recommendations_df.empty:
                            st.dataframe(recommendations_df)
                        else:
                            st.write("Could not generate recommendations for the new user.")
                    else:
                        st.error("Failed to create new user.")
                else:
                    st.warning("Please provide anime titles and ratings to create a new user.")
        else:
            st.warning("Please enter anime titles and ratings for the new user.")

#Starting the Streamlit App

In [None]:
!streamlit run app.py --server.port=8989 &>/./logs.txt &

#Setting Up ngrok Tunnel

In [None]:
from getpass import getpass

ngrok_auth_token = getpass('Enter ngrok API Key: ')

In [None]:
from pyngrok import ngrok
import yaml

# Terminate open tunnels if exist
ngrok.kill()

# Authenticate ngrok with the token read from the file
!ngrok config add-authtoken {ngrok_auth_token}

# Open an HTTPS tunnel on port XXXX which you get from your `logs.txt` file
ngrok_tunnel = ngrok.connect(8989)
print("Streamlit App:", ngrok_tunnel.public_url)