In [None]:
pip install streamlit pandas numpy seaborn scikit-learn matplotlib

Collecting streamlit
  Downloading streamlit-1.39.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<6,>=2.1.5 (from streamlit)
  Downloading watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)
Downloading streamlit-1.39.0-py2.py3-none-any.whl (8.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [3

In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
from math import sqrt

In [None]:
@st.cache_data
def load_data():
  df=pd.read_csv('combined_dataset.csv')
  return df
  st.dataframe(df)

2024-10-06 04:51:22.920 No runtime found, using MemoryCacheStorageManager


In [None]:
def eda(df):
  st.title("Exploratory Data analysis")
  st.subheader("data preview")
  st.write(df.head())
  st.subheader("data info")
  st.write(df.info())

In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
from math import sqrt

# Load Data
@st.cache_data
def load_data():
    df = pd.read_csv('combined_dataset.csv')
    return df

# EDA and Data Preprocessing
def eda(df):
    st.title("Exploratory Data Analysis")

    # Show Data
    st.subheader("Data Preview")
    st.write(df.head())

    # Data Info
    st.subheader("Data Info")
    st.write(df.info())

    # Data Cleaning: Fill missing values
    df.fillna(0, inplace=True)

    # Visualization 1: Distribution of Ratings
    st.subheader("Distribution of Book Ratings")
    plt.figure(figsize=(10, 6))
    sns.histplot(df['Book-Rating'], bins=20, kde=True)
    plt.title('Distribution of Book Ratings')
    st.pyplot(plt)

    # Visualization 2: Top Authors by Number of Ratings
    st.subheader("Top Authors by Number of Ratings")
    top_authors = df['Book-Author'].value_counts().head(10)
    plt.figure(figsize=(12, 6))
    sns.barplot(x=top_authors.index, y=top_authors.values, palette='viridis')
    plt.title('Top 10 Authors by Number of Books Rated')
    plt.xticks(rotation=45)
    st.pyplot(plt)

    # Age Distribution of Users
    st.subheader("Age Distribution of Users")
    plt.figure(figsize=(10, 6))
    sns.histplot(df['Age'], bins=20, kde=True)
    plt.title('Age Distribution of Users')
    st.pyplot(plt)

# Model Building
def collaborative_filtering(df):
    st.subheader("Model 1: Collaborative Filtering (SVD)")

    # User-Item Matrix
    user_item_matrix = df.pivot_table(index='User-ID', columns='ISBN', values='Book-Rating').fillna(0)

    # SVD model
    svd = TruncatedSVD(n_components=50)
    user_factors = svd.fit_transform(user_item_matrix)
    book_factors = svd.components_

    # Reconstruct rating matrix
    predicted_ratings = np.dot(user_factors, book_factors)
    predicted_ratings_df = pd.DataFrame(predicted_ratings, index=user_item_matrix.index, columns=user_item_matrix.columns)

    return predicted_ratings_df, user_item_matrix

def content_based_filtering(df):
    st.subheader("Model 2: Content-Based Filtering (Cosine Similarity)")

    # Feature matrix for content-based filtering (based on Author and Publisher)
    content_matrix = df[['Book-Author', 'Publisher', 'Year-Of-Publication']].apply(lambda x: pd.factorize(x)[0])

    # Apply cosine similarity
    content_similarity = cosine_similarity(content_matrix)

    # Convert to DataFrame
    content_similarity_df = pd.DataFrame(content_similarity, index=df['ISBN'], columns=df['ISBN'])

    return content_similarity_df

def hybrid_model(collaborative_ratings, content_ratings):
    st.subheader("Model 3: Hybrid Model (Collaborative + Content-Based)")

    # Combine collaborative and content-based filtering
    hybrid_ratings = 0.7 * collaborative_ratings + 0.3 * content_ratings
    return hybrid_ratings

# Model Evaluation
def evaluate_model(true_ratings, predicted_ratings):
    st.subheader("Model Evaluation")

    # RMSE for collaborative filtering
    rmse_value = sqrt(mean_squared_error(true_ratings.values.flatten(), predicted_ratings.values.flatten()))
    st.write(f"RMSE: {rmse_value:.4f}")

    return rmse_value

# Recommendation function
def recommend_books(user_id, ratings_matrix, num_recommendations=5):
    user_ratings = ratings_matrix.loc[user_id].sort_values(ascending=False).head(num_recommendations)
    return user_ratings

# Streamlit App
def main():
    st.sidebar.title("Book Recommendation System")
    st.sidebar.subheader("Choose a Model:")
    option = st.sidebar.selectbox(
        'Which model do you want to use?',
        ('Collaborative Filtering', 'Content-Based Filtering', 'Hybrid Model'))

    df = load_data()

    # Step 1: EDA
    if st.sidebar.checkbox("Show EDA"):
        eda(df)

    # Step 2: Model Building
    if st.sidebar.button("Build Model"):
        if option == 'Collaborative Filtering':
            predicted_ratings, user_item_matrix = collaborative_filtering(df)
            st.write("Collaborative Filtering Model Built!")

            # Step 3: Model Evaluation
            evaluate_model(user_item_matrix, predicted_ratings)

            # Step 4: Recommendations
            user_id = st.number_input("Enter User ID:", min_value=1, value=5)
            num_recommendations = st.slider("Number of Recommendations:", 1, 10, 5)
            recommendations = recommend_books(user_id, predicted_ratings, num_recommendations)
            st.write("Top Recommendations:")
            st.write(recommendations)

        elif option == 'Content-Based Filtering':
            content_ratings = content_based_filtering(df)
            st.write("Content-Based Filtering Model Built!")
            st.write("Top Similar Books for First Book:")
            st.write(content_ratings.iloc[0].sort_values(ascending=False).head(5))

        else:
            predicted_ratings, user_item_matrix = collaborative_filtering(df)
            content_ratings = content_based_filtering(df)
            hybrid_ratings = hybrid_model(predicted_ratings, content_ratings)
            st.write("Hybrid Model Built!")

            # Step 4: Recommendations
            user_id = st.number_input("Enter User ID:", min_value=1, value=5)
            num_recommendations = st.slider("Number of Recommendations:", 1, 10, 5)
            recommendations = recommend_books(user_id, hybrid_ratings, num_recommendations)
            st.write("Top Recommendations:")
            st.write(recommendations)

if __name__ == "__main__":
    main()


2024-10-06 04:55:23.779 No runtime found, using MemoryCacheStorageManager
