In [None]:
import numpy as np 
import pandas as pd
import warnings

# 📊 Visualizations
import matplotlib.pyplot as plt
import seaborn as sns

# 🤖 Machine Learning
from sklearn.preprocessing import StandardScaler, LabelEncoder

from sklearn.cluster import KMeans

In [None]:
pd.set_option('display.max_columns', None) # display all columns
warnings.filterwarnings('ignore') # ignore warnings

## Data

In [None]:
books_final = pd.read_csv("books_final.csv")
books_final

In [None]:
data_features = books_final[["Author", "Genre"]]
data_features

In [None]:
from sklearn.preprocessing import OneHotEncoder

ohe = OneHotEncoder(sparse_output=False) # To avoid having an sparse_matrix as output

ohe.fit(data_features[['Author','Genre']]) # The .fit() method determines the unique values of each column
data_features_ohe = ohe.transform(data_features[['Author','Genre']])
data_features_ohe = pd.DataFrame(data_features_ohe)
data_features_ohe

## Elbow method

In [None]:
inertias = []
range_of_clusters = range(1, 11)

for k in range_of_clusters:
    model = KMeans(n_clusters=k, random_state=42, n_init=10)
    model.fit(data_features_ohe)
    inertias.append(model.inertia_)
inertias

In [None]:
import plotly.graph_objects as go

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=list(range_of_clusters), y=inertias, mode='lines+markers', name='Inertia'))
fig.update_layout(title='Elbow Method For Optimal k',
                  xaxis_title='Number of clusters, k',
                  yaxis_title='Inertia',
                  xaxis=dict(tickmode='array', tickvals=list(range_of_clusters)))
fig.show()

In [None]:
import sys
from kneed import KneeLocator

In [None]:
kn = KneeLocator(range_of_clusters, inertias, curve='convex', direction='decreasing')
optimal_clusters = kn.knee

print(f"Knee method optimal clusters: {optimal_clusters}")

## K-Means

In [None]:
num_clusters = 7  
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
books_final["cluster"] = kmeans.fit_predict(data_features_ohe)
books_final

In [None]:
from sklearn.metrics import silhouette_score

In [None]:
silhouette_avg = silhouette_score(data_features_ohe, kmeans.labels_)
print(f'Silhouette Score: {silhouette_avg:.3f}')

## Recommender

In [None]:
def recommend_similar_books(book_index, df):
    cluster_label = df.loc[book_index, "cluster"]
    cluster_value = cluster_label.iloc[0]
    return df[df["cluster"] == cluster_value].sample(5)  # Return 5 random recommendations

In [None]:
choice = "Dust"

book_index = books_final[books_final['Title'] == choice].index
book_index

cluster_label = books_final.loc[book_index, "cluster"]
cluster_label

cluster_value = cluster_label.iloc[0]
cluster_value

rec = books_final[books_final["cluster"] == cluster_value].sample(5)
rec

#rec = recommend_similar_books(book_index, books_final)
#rec

In [None]:
#Select other cars in the same cluster (cluster 3)

books_final[books_final["cluster"] == 3].sample(5)

In [None]:
!streamlit run 4-ML-App.py

In [None]:
books_final[books_final["cluster"] == np.random.random_integers(1,7)].sample(5)

In [None]:
books_final.sample(5)

In [None]:
!streamlit run Recommender.py