In [5]:
!pip install streamlit

Collecting streamlit
  Using cached streamlit-1.38.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting altair<6,>=4.0 (from streamlit)
  Using cached altair-5.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting blinker<2,>=1.0.0 (from streamlit)
  Using cached blinker-1.8.2-py3-none-any.whl.metadata (1.6 kB)
Collecting cachetools<6,>=4.0 (from streamlit)
  Using cached cachetools-5.5.0-py3-none-any.whl.metadata (5.3 kB)
Collecting pyarrow>=7.0 (from streamlit)
  Using cached pyarrow-17.0.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (3.3 kB)
Collecting tenacity<9,>=8.1.0 (from streamlit)
  Using cached tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
Collecting toml<2,>=0.10.1 (from streamlit)
  Using cached toml-0.10.2-py2.py3-none-any.whl.metadata (7.1 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Using cached GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Using cached pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 

In [6]:
# Reinstall lightfm with gcc-14 compiled to use all threads
!CC=gcc-14 pip install --no-binary lightfm lightfm --force-reinstall

Collecting lightfm
  Using cached lightfm-1.17-cp311-cp311-macosx_14_0_arm64.whl
Collecting numpy (from lightfm)
  Using cached numpy-2.1.1-cp311-cp311-macosx_14_0_arm64.whl.metadata (60 kB)
Collecting scipy>=0.17.0 (from lightfm)
  Using cached scipy-1.14.1-cp311-cp311-macosx_14_0_arm64.whl.metadata (60 kB)
Collecting requests (from lightfm)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting scikit-learn (from lightfm)
  Using cached scikit_learn-1.5.2-cp311-cp311-macosx_12_0_arm64.whl.metadata (13 kB)
Collecting charset-normalizer<4,>=2 (from requests->lightfm)
  Using cached charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl.metadata (33 kB)
Collecting idna<4,>=2.5 (from requests->lightfm)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting urllib3<3,>=1.21.1 (from requests->lightfm)
  Using cached urllib3-2.2.3-py3-none-any.whl.metadata (6.5 kB)
Collecting certifi>=2017.4.17 (from requests->lightfm)
  Using cached certifi-2024.8.3

In [7]:
# Reinstall lightfm with gcc-14 compiled to use all threads
# !CC=gcc-14 pip install --no-binary lightfm lightfm --force-reinstall

import streamlit as st
import joblib
import numpy as np
import pandas as pd
from lightfm import LightFM
from lightfm.data import Dataset
from loading_datasets import load_data

# Load the dataset
books_users_ratings = load_data()[1]

# Convert 'user_id' and 'isbn' to strings to ensure compatibility with LightFM
books_users_ratings['user_id'] = books_users_ratings['user_id'].astype(int)
books_users_ratings['user_id'] = books_users_ratings['user_id'].astype(str)
books_users_ratings['isbn'] = books_users_ratings['isbn'].astype(str)
books_users_ratings['year_of_publication'] = books_users_ratings['year_of_publication'].astype(str)

# Load the model
model = joblib.load('lightfm_model.pkl')

# Create a LightFM dataset object
dataset = Dataset()

# Fit the dataset to include all unique users and items
dataset.fit(
    users = books_users_ratings['user_id'].unique(),
    items = books_users_ratings['isbn'].unique()
)

# Build the interaction matrix with user-item pairs
interactions = dataset.build_interactions(
    [(str(x[0]), str(x[1])) for x in books_users_ratings[['user_id', 'isbn']].values]
)

# Build weights array from individual ratings by matching interaction tuples
interaction_tuples = [(str(x[0]), str(x[1])) for x in books_users_ratings[['user_id', 'isbn']].values]
weights = np.array([books_users_ratings.loc[(books_users_ratings['user_id'] == u) & 
                                            (books_users_ratings['isbn'] == i), 
                                            'individual_rating'].values[0] 
                    for u, i in interaction_tuples])

# Get the user and item mappings
user_mapping, _, item_mapping, _ = dataset.mapping()


####################
def recommend_books(model, interactions, user_id, user_mapping, item_mapping, num_recommendations=10):

    # Check if the user_id exists in the user mapping
    if user_id not in user_mapping:
        raise ValueError(f"User ID {user_id} is not found in the dataset.")

    # Get the internal index for the user_id
    user_idx = user_mapping[user_id]

    # Predict scores for all items for the given user
    scores = model.predict(user_idx, np.arange(interactions.shape[1]))

    # Get the indices of the top scores
    top_items = np.argsort(-scores)[:num_recommendations]

    # Map the indices back to ISBNs
    recommended_isbns = [list(item_mapping.keys())[list(item_mapping.values()).index(item)] for item in top_items]

    return recommended_isbns
####################


# Streamlit app
st.title("Book Recommender System")

user_id = st.text_input("Enter User ID:")

if user_id:
    try:
        isbn_list = recommend_books(model, interactions, user_id, user_mapping, item_mapping)
        cols = ['book_title', 'book_author', 'year_of_publication', 'publisher']
        recommendations = books_users_ratings[books_users_ratings['isbn'].isin(isbn_list)]
        recommendations = recommendations[cols]
        st.write("Top Recommendations Using The LightFM Model:")
        st.write(recommendations)
    except ValueError as e:
        st.error(str(e))

2024-09-19 15:33:53.424 
  command:

    streamlit run /Users/laylanyrabia/neuefische/capstone/matchread/MatchRead/.venv/lib/python3.11/site-packages/ipykernel_launcher.py [ARGUMENTS]
2024-09-19 15:33:53.426 Session state does not function when running a script without `streamlit run`
