**Importing Libraries**

In [None]:
!pip install datasets
!pip install ydata-profiling


Collecting ydata-profiling
  Downloading ydata_profiling-4.18.0-py2.py3-none-any.whl.metadata (22 kB)
Collecting visions<0.8.2,>=0.7.5 (from visions[type_image_path]<0.8.2,>=0.7.5->ydata-profiling)
  Downloading visions-0.8.1-py3-none-any.whl.metadata (11 kB)
Collecting minify-html>=0.15.0 (from ydata-profiling)
  Downloading minify_html-0.18.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting filetype>=1.0.0 (from ydata-profiling)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting phik<0.13,>=0.12.5 (from ydata-profiling)
  Downloading phik-0.12.5-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (5.6 kB)
Collecting multimethod<2,>=1.4 (from ydata-profiling)
  Downloading multimethod-1.12-py3-none-any.whl.metadata (9.6 kB)
Collecting imagehash==4.3.2 (from ydata-profiling)
  Downloading ImageHash-4.3.2-py2.py3-none-any.whl.metadata (8.4 kB)
Collecting dacite<2,>=1.9 (from ydata-profiling)
  Downloading

**Data Profiling**

In [None]:
from ydata_profiling import ProfileReport
import pandas as pd
df = pd.read_csv("goodreads_data.csv")
profile = ProfileReport(df, title="Data Profiling Report", explorative=True)
profile.to_file("data_profile.html")
profile.to_notebook_iframe()

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]


  0%|          | 0/8 [00:00<?, ?it/s][A
 25%|██▌       | 2/8 [00:00<00:00,  8.65it/s][A
100%|██████████| 8/8 [00:03<00:00,  2.37it/s]


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

**Model Training**

In [None]:
import os
import re
import numpy as np
import pandas as pd
import ast
import torch

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, f1_score

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    EarlyStoppingCallback,
)

from datasets import Dataset

# Disable Weights and Biases
os.environ["WANDB_DISABLED"] = "true"

# 1. Load and preprocess the dataset
df = pd.read_csv("goodreads_data.csv", engine="python", on_bad_lines="skip")

# Clean Num_Ratings if present
if "Num_Ratings" in df.columns:
    df["Num_Ratings"] = (
    df["Num_Ratings"]
    .astype(str)
    .str.replace(",", "", regex=False)
    .pipe(pd.to_numeric, errors="coerce")
)


# Clean Description
df["Description"] = df["Description"].fillna("").astype(str).apply(lambda x: re.sub(r"\s+", " ", x.strip()))
df = df[df["Description"].str.split().apply(len) > 20]

# Clean Genre
def clean_genre(x):
    try:
        genres = ast.literal_eval(x)
        return genres[0].strip() if isinstance(genres, list) else str(x).strip()
    except:
        return str(x).strip()

df["Primary_Genre"] = df["Genres"].replace(r'^\s*$', 'Unknown', regex=True).fillna("Unknown").apply(clean_genre)

# Remove rare genres
genre_counts = df["Primary_Genre"].value_counts()
valid_genres = genre_counts[genre_counts >= 50].index
df = df[df["Primary_Genre"].isin(valid_genres)]

# Sample up to 2000 per genre (balanced sampling)
n_samples_per_genre = 2000
df = df.groupby('Primary_Genre').apply(
    lambda x: x.sample(n=min(len(x), n_samples_per_genre), random_state=42)
).reset_index(drop=True)

# Encode labels
label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["Primary_Genre"])
label_map = {index: label for index, label in enumerate(label_encoder.classes_)}

# Combine Title + Description
df["Book"] = df["Book"].fillna("")
df["text"] = df["Book"] + " " + df["Description"]

# TF-IDF based outlier removal
tfidf = TfidfVectorizer(max_features=5000)
X_tfidf = tfidf.fit_transform(df["text"])
tfidf_norms = np.linalg.norm(X_tfidf.toarray(), axis=1)
df["tfidf_norm"] = tfidf_norms
df = df[df["tfidf_norm"] < np.percentile(tfidf_norms, 95)]

# Final dataset
df = df[["text", "label"]]

# Stratified Train/Val/Test split
train_text, temp_text, train_labels, temp_labels = train_test_split(
    df['text'], df['label'], test_size=0.1, random_state=42, stratify=df['label']
)

val_text, test_text, val_labels, test_labels = train_test_split(
    temp_text, temp_labels, test_size=0.5, random_state=42, stratify=temp_labels
)

train_df = pd.DataFrame({'text': train_text, 'label': train_labels})
val_df = pd.DataFrame({'text': val_text, 'label': val_labels})
test_df = pd.DataFrame({'text': test_text, 'label': test_labels})


# Tokenization and Dataset Preparation
model_name = "roberta-large"  # more powerful model
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(batch):
    return tokenizer(batch["text"], padding='max_length', truncation=True, max_length=256)

train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)
test_dataset = Dataset.from_pandas(test_df)

train_dataset = train_dataset.map(tokenize, batched=True)
val_dataset = val_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

# Change the format to numpy first
train_dataset.set_format("numpy", columns=["input_ids", "attention_mask", "label"])
val_dataset.set_format("numpy", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format("numpy", columns=["input_ids", "attention_mask", "label"])

# Manually convert numpy arrays to torch tensors
def numpy_to_torch(batch):
    return {k: torch.tensor(v) for k, v in batch.items()}

train_dataset.set_transform(numpy_to_torch)
val_dataset.set_transform(numpy_to_torch)
test_dataset.set_transform(numpy_to_torch)


# Load model and define training arguments
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(label_map))

training_args = TrainingArguments(
    output_dir="./best_model",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=8,
    weight_decay=0.01,
    warmup_steps=500,
    logging_dir="./logs",
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
    greater_is_better=True,
    seed=42,
    label_smoothing_factor=0.1,
    fp16=torch.cuda.is_available()
)

# Evaluation metrics
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="weighted")
    return {"accuracy": acc, "f1": f1}

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

trainer.train()

# Save the model
model.config.label2id = {v: k for k, v in label_map.items()}
model.config.id2label = label_map
model.save_pretrained("./best_model")
tokenizer.save_pretrained("./best_model")

# Final evaluation
test_results = trainer.evaluate(test_dataset)
print("\nTest set results:")
print(f"Accuracy: {test_results['eval_accuracy']:.4f}")
print(f"F1 Score: {test_results['eval_f1']:.4f}")

print("✅ Model training complete and saved.")


  df = df.groupby('Primary_Genre').apply(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/7085 [00:00<?, ? examples/s]

Map:   0%|          | 0/394 [00:00<?, ? examples/s]

Map:   0%|          | 0/394 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,1.756115,0.550761,0.530063
2,2.278700,1.539405,0.670051,0.653248
3,1.438700,1.490007,0.685279,0.673344
4,1.078200,1.63119,0.677665,0.674954
5,0.865800,1.708076,0.708122,0.699026
6,0.758800,1.733765,0.682741,0.675317
7,0.708800,1.727684,0.69797,0.695487



Test set results:
Accuracy: 0.6904
F1 Score: 0.6792
✅ Model training complete and saved.


**Dashboard Using Streamlit**

In [None]:
!pip install streamlit pyngrok transformers datasets torch torchvision torchaudio pytesseract SpeechRecognition pydub


Collecting streamlit
  Downloading streamlit-1.51.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.5.0-py3-none-any.whl.metadata (8.1 kB)
Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Collecting SpeechRecognition
  Downloading speechrecognition-3.14.4-py3-none-any.whl.metadata (30 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.51.0-py3-none-any.whl (10.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m113.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.5.0-py3-none-any.whl (24 kB)
Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Downloading speechrecognition-3.14.4-py3-none-any.whl (32.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.9/32.9 MB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6

In [None]:
pip install python-magic
!apt-get update
!apt-get install ffmpeg -y


SyntaxError: invalid syntax (ipython-input-1709721643.py, line 1)

In [None]:
!pkill -f ngrok

In [None]:
!ngrok config add-authtoken YOUR_AUTH_TOKEN

In [None]:
!streamlit run app.py &>/content/log.txt &

In [None]:
!tail -n 20 /content/log.txt

In [None]:
%%writefile app.py
# app.py
import os
import csv
import time
import tempfile
from datetime import datetime
import io
import streamlit as st

# Light imports now; heavy imports (transformers, torch, pytesseract) are lazy-loaded where needed
import pandas as pd
import requests

# ----------------- CONFIG -----------------
MODEL_PATH = "./best_model"
EMOTION_MODEL = "j-hartmann/emotion-english-distilroberta-base"
BOOKS_DATA_PATH = "goodreads_data.csv"
OPENROUTER_API_KEY = "YOUR_API_KEY"
# Load books dataset safely (fallback to empty DataFrame if missing)
try:
    books_df = pd.read_csv(BOOKS_DATA_PATH)
except Exception:
    books_df = pd.DataFrame(columns=["Book", "Author", "Genres", "Avg_Rating", "URL", "Description"])

emotion_to_genre = {
    "joy": "Comedy, Romance",
    "sadness": "Romance, Drama",
    "anger": "Thriller, Mystery",
    "fear": "Fantasy, Paranormal",
    "disgust": "Dark Fiction, Horror",
    "surprise": "Adventure, Sci-Fi",
    "neutral": "Contemporary Fiction",
    "love": "Romance, Fantasy Romance",
    "curiosity": "Historical Fiction, Sci-Fi",
    "guilt": "Psychological Fiction, Thriller",
    "shame": "Drama, Tragedy",
    "pride": "Self-help, Motivational",
    "hope": "Inspiration, Self-help",
    "relief": "Feel-good, Contemporary Fiction",
    "confusion": "Psychological Thriller, Mystery",
    "excitement": "Adventure, Fantasy"
}

# ----------------- PAGE SETTINGS -----------------
st.set_page_config(page_title="📚 Feel2Read", layout="centered")

# WINTER THEME - paste immediately after st.set_page_config(...)
st.markdown(r"""
<style>
:root{
  --win-1: #E6F8FA;   /* pale icy background */
  --win-2: #CFEFF1;   /* soft aqua */
  --win-3: #9CC6C9;   /* muted teal */
  --win-4: #F8FAFB;   /* very light / near-white for cards */
  --accent: #0B2E33;  /* deep teal for text accents */
  --muted: #49686a;
  --card-border: rgba(11,46,51,0.06);
}

/* Page background: subtle stacked-books wallpaper + winter wash */
.stApp {
  background:
    linear-gradient(180deg, rgba(230,248,250,0.92), rgba(207,239,241,0.86)),
    url('https://images.unsplash.com/photo-1524995997946-a1c2e315a42f?auto=format&fit=crop&w=1600&q=60');
  background-size: cover;
  background-position: center;
  background-attachment: fixed;
  color: var(--accent);
}

/* Constrain main container for nicer layout */
.main .block-container {
  max-width: 980px;
  margin-left: auto;
  margin-right: auto;
  padding-top: 24px;
  padding-bottom: 32px;
  background: transparent;
}

/* Top card / login card style */
.login-card, .stContainer, .stApp > .main, .preview-block {
  background: linear-gradient(180deg, rgba(248,250,251,0.98), rgba(248,250,251,0.95));
  border-radius: 14px;
  border: 1px solid var(--card-border);
  box-shadow: 0 12px 28px rgba(11,46,51,0.06);
  padding: 12px;
  backdrop-filter: blur(4px);
}

/* Sidebar container */
section[data-testid="stSidebar"] > div[role="complementary"] {
  background: linear-gradient(180deg, rgba(207,239,241,0.6), rgba(230,248,250,0.45));
  border-radius: 12px;
  border: 1px solid var(--card-border);
  padding: 10px;
}

/* Inputs: pale winter tint, rounded */
.stTextInput>div>div>input,
.stNumberInput>div>div>input,
.stTextArea>div>div>textarea,
.stSelectbox>div>div>div[role="button"],
.stFileUploader>div>label {
  background: var(--win-4) !important;
  border-radius: 12px !important;
  padding: 12px 14px !important;
  border: 1px solid rgba(11,46,51,0.06) !important;
  color: var(--accent) !important;
  box-shadow: 0 6px 18px rgba(11,46,51,0.03) inset !important;
}

/* Placeholder text */
.stTextInput>div>div>input::placeholder,
.stTextArea>div>div>textarea::placeholder {
  color: #9aa8a9 !important;
}

/* Input focus */
.stTextInput>div>div>input:focus,
.stTextArea>div>div>textarea:focus,
.stNumberInput>div>div>input:focus {
  outline: none !important;
  border: 1.6px solid var(--win-3) !important;
  box-shadow: 0 10px 26px rgba(156,198,201,0.14) !important;
}

/* HEADINGS */
h1, h2, h3, h4 {
  color: var(--accent) !important;
}

/* DATAFRAME styling (gentle) */
.stDataFrame thead th {
  background: linear-gradient(90deg, rgba(230,248,250,0.9), rgba(207,239,241,0.9));
  color: var(--accent);
}

/* --- Buttons: FORCE WHITE primary button background --- */
/* All Streamlit buttons become white with dark accent text. */
.stButton>button,
.stDownloadButton button,
button[kind="primary"],
.primary-btn {
  background: #ffffff !important;         /* white bg */
  color: var(--accent) !important;        /* dark teal text */
  border-radius: 12px !important;
  padding: 10px 18px !important;
  border: 1px solid rgba(11,46,51,0.06) !important;
  box-shadow: 0 10px 22px rgba(11,46,51,0.06) !important;
  font-weight: 700 !important;
}

/* Hover for white buttons: subtle lift + teal outline */
.stButton>button:hover,
.stDownloadButton button:hover,
button[kind="primary"]:hover,
.primary-btn:hover {
  transform: translateY(-3px);
  box-shadow: 0 14px 30px rgba(11,46,51,0.09) !important;
  border-color: rgba(11,46,51,0.12) !important;
}

/* For any place code tried to set gradient earlier, force white */
.stButton>button[style*="linear-gradient"] { background: #ffffff !important; }

/* Make the bright-action text slightly darker for contrast */
.stButton>button, .primary-btn { color: var(--accent) !important; }

/* Small notes & muted text */
.secondary-note, .streamlit-expanderHeader, label, p {
  color: var(--muted) !important;
}

/* floating book drop shadow (if used) */
.float-book { filter: drop-shadow(0 8px 18px rgba(11,46,51,0.08)); }

/* Responsive tweaks */
@media (max-width:900px) {
  .login-card, .stContainer { padding: 14px; margin: 8px; }
  .stTextInput>div>div>input, .stNumberInput>div>div>input { padding: 10px !important; }
}
</style>
""", unsafe_allow_html=True)

# ----------------- HELPER: lazy loaders -----------------
_genre_pipeline = None
_emotion_pipeline = None


def _device_index():
    try:
        import torch
        return 0 if torch.cuda.is_available() else -1
    except Exception:
        return -1


def load_genre_pipeline():
    global _genre_pipeline
    if _genre_pipeline is not None:
        return _genre_pipeline
    from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
    device = _device_index()
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
    _genre_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device)
    return _genre_pipeline


def load_emotion_pipeline():
    global _emotion_pipeline
    if _emotion_pipeline is not None:
        return _emotion_pipeline
    from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
    device = _device_index()
    model = AutoModelForSequenceClassification.from_pretrained(EMOTION_MODEL)
    tokenizer = AutoTokenizer.from_pretrained(EMOTION_MODEL)
    _emotion_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer, return_all_scores=True, device=device)
    return _emotion_pipeline


# ----------------- SMALL UTILITIES -----------------
import requests
import os
import streamlit as st
import requests

import random

def generate_inspirational_quote():
    if "inspirational_quote" in st.session_state:
        return st.session_state.inspirational_quote

    quotes = [
        "Reading gives you wings to explore new worlds.",
        "A book a day keeps boredom away.",
        "Every page turned is a new adventure.",
        "Books are passports to infinite journeys.",
        "Dive into a story, emerge wiser."
    ]
    quote_text = random.choice(quotes)
    st.session_state.inspirational_quote = quote_text
    return quote_text


import magic
import speech_recognition as sr
from pydub import AudioSegment
import tempfile, shutil, os
import streamlit as st


def transcribe_audio(uploaded_file):

    try:
        # Load file bytes
        file_bytes = uploaded_file.read()

        # Detect actual audio format
        mime = magic.Magic(mime=True).from_buffer(file_bytes)

        if "wav" in mime:
            format = "wav"
        elif "webm" in mime:
            format = "webm"
        elif "ogg" in mime:
            format = "ogg"
        elif "mpeg" in mime:
            format = "mp3"
        elif "mp3" in mime:
            format = "mp3"
        elif "m4a" in mime:
            format = "m4a"
        elif "mp4" in mime:  # <--- THIS FIXES YOUR ERROR
            format = "mp4"
        else:
            st.error(f"Unsupported or unknown audio type: {mime}")
            return None

        # Create temp input file
        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{format}") as temp_in:
            temp_in.write(file_bytes)
            temp_in_path = temp_in.name

        # Convert any format → WAV 16kHz mono
        try:
            audio = AudioSegment.from_file(temp_in_path, format=format)
        except Exception as e:
            st.error(f"FFmpeg could not decode this file.\nDetected MIME: {mime}\nError: {e}")
            return None

        audio = audio.set_channels(1).set_frame_rate(16000)

        # Save wav file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav:
            wav_path = temp_wav.name
            audio.export(wav_path, format="wav")

        # Speech Recognition
        r = sr.Recognizer()
        r.energy_threshold = 200
        r.dynamic_energy_threshold = True

        with sr.AudioFile(wav_path) as source:
            audio_data = r.record(source)

        # Clean up
        os.remove(temp_in_path)
        os.remove(wav_path)

        # Google Speech API
        try:
            return r.recognize_google(audio_data)
        except sr.UnknownValueError:
            return None

    except Exception as e:
        st.error(f"Audio processing error: {e}")
        return None





def display_recommendations(recommended_books: pd.DataFrame):
    if recommended_books is None or recommended_books.empty:
        st.info("No recommendations found.")
        return

    recommended_books = recommended_books.head(5)
    tabs = st.tabs([f"Book {i+1}" for i in range(len(recommended_books))])
    FIXED_IMAGE = "https://youngscholarz.com/wp-content/uploads/2024/11/open-book.jpg"

    def rating_to_stars(r):
        try:
            r = float(r)
            full = int(r)
            half = (r - full) >= 0.5
            return "⭐" * full + ("✰" if half else "")
        except Exception:
            return ""

    for tab, (_, row) in zip(tabs, recommended_books.iterrows()):
        with tab:
            cols = st.columns([1, 2], gap="large")
            description = str(row.get("Description", ""))

            # Estimate number of text lines (~80 chars per line)
            num_lines = max(len(description) // 80, 1)

            # Calculate image height based on description length, capped at 700px
            img_height = min(300 + num_lines * 15, 700)

            with cols[0]:
                st.image(FIXED_IMAGE, use_container_width=True, clamp=False)

            with cols[1]:
                st.header(f"{row.get('Book','Unknown')} — {row.get('Author','Unknown')}")
                st.write("*Genres:*", row.get("Genres", "Unknown"))
                st.write("*Avg Rating:*", rating_to_stars(row.get("Avg_Rating", 0)), f"({row.get('Avg_Rating','N/A')})")

                url = row.get("URL", "")
                if pd.notna(url) and str(url).strip():
                    st.markdown(f"[Goodreads link]({url})")

                # Show description (truncated to 500 chars for neatness)
                st.write(description[:500] + "..." if len(description) > 500 else description)


# ----------------- UI & Flow -----------------
if "logged_in" not in st.session_state:
    st.session_state.logged_in = False
    st.session_state.user_name = ""
    st.session_state.user_age = None
    st.session_state.favorite_book = ""


# ----------------- RECOMMENDATION HELPER -----------------
def get_recommendations(genre_string):
    """
    Returns a DataFrame of recommended books based on genre string.
    Excludes user's favorite book if set.
    """
    if not isinstance(genre_string, str) or not genre_string.strip():
        return books_df.sample(5) if not books_df.empty else pd.DataFrame()

    parts = [g.strip() for g in genre_string.split(",") if g.strip()]

    # Try exact matches first
    for g in parts:
        try:
            recs = books_df[books_df["Genres"].str.contains(g, case=False, na=False)]
        except Exception:
            recs = pd.DataFrame()
        if not recs.empty:
            fav = st.session_state.get("favorite_book", "")
            if fav:
                recs = recs[recs["Book"] != fav]
            return recs.sample(min(5, len(recs)))

    # Try token-level fallback
    for g in parts:
        for t in g.split():
            try:
                recs = books_df[books_df["Genres"].str.contains(t, case=False, na=False)]
            except Exception:
                recs = pd.DataFrame()
            if not recs.empty:
                fav = st.session_state.get("favorite_book", "")
                if fav:
                    recs = recs[recs["Book"] != fav]
                return recs.sample(min(5, len(recs)))

    # Final fallback: random
    return books_df.sample(5) if not books_df.empty else pd.DataFrame()



def login_page():
    """
    Cute + interactive login page with stacked-book background.
    """
    st.markdown(
        """
        <style>
        .login-card {
            max-width: 920px;
            margin: 24px auto;
            padding: 28px;
            border-radius: 20px;
            background: rgba(184, 227, 233, 0.45);
            border: 1px solid rgba(15, 41, 44, 0.18);
            box-shadow: 0 18px 40px rgba(15, 41, 44, 0.18), 0 6px 16px rgba(0,0,0,0.08);
            backdrop-filter: blur(6px);
            -webkit-backdrop-filter: blur(6px);
            font-family: Inter, "Segoe UI", Roboto, Arial, sans-serif;
        }

        .login-heading { font-size: 30px; font-weight: 700; color: #0B2E33; margin: 0; }
        .login-sub { color:#335054; margin-top:6px; margin-bottom:10px; }

        .avatar {
            width:110px; height:110px; border-radius:50%;
            display:flex; align-items:center; justify-content:center; font-size:44px;
            color:white; font-weight:700; margin:auto;
            background: linear-gradient(135deg,#4F7C82,#0B2E33);
            box-shadow: 0 10px 30px rgba(11,46,51,0.22);
        }

        .preview-block {
            background: rgba(184, 227, 233, 0.55);
            border: 1px solid rgba(15, 41, 44, 0.16);
            border-radius: 12px;
            padding: 12px;
        }

        @media (max-width:900px) {
            .login-card { padding:18px; margin:12px; }
            .avatar { width:88px; height:88px; font-size:36px; }
        }
        </style>
        """,
        unsafe_allow_html=True,
    )

    # outer card
    st.markdown("<div class='login-card'>", unsafe_allow_html=True)

    # header row
    col_l, col_r = st.columns([3, 1])
    with col_l:
        st.markdown("<div class='login-heading'>📚 Feel2Read — Hello there!</div>", unsafe_allow_html=True)
        st.markdown(
            "<div class='login-sub'>A cozy place that recommends books based on your mood. Let's make it personal — quick & cute.</div>",
            unsafe_allow_html=True,
        )
    with col_r:
        st.empty()

    st.markdown("<hr>", unsafe_allow_html=True)

    # the form (single submit)
    with st.form("login_form", clear_on_submit=False):
        left_col, right_col = st.columns([2, 1])

        with left_col:
            name = st.text_input(
                "Your name",
                value=st.session_state.get("user_name", ""),
                placeholder="How should we call you?",
                key="name_input",
            )
            age = st.number_input(
                "Age",
                min_value=5,
                max_value=120,
                step=1,
                value=st.session_state.get("user_age", 18),
                key="age_input",
            )

            # load book options safely
            try:
                books_local = pd.read_csv(BOOKS_DATA_PATH)
                book_options = ["(None)"] + sorted(
                    books_local["Book"].dropna().unique().tolist()
                )
            except Exception:
                book_options = ["(None)"]

            favorite = st.selectbox(
                "Your favorite book (optional)", options=book_options, index=0
            )
            custom_fav = st.text_input(
                "Or custom favorite book", placeholder="Type a title (optional)"
            )

            st.markdown(
                "<div class='secondary-note'>Tip: if your favorite isn't listed, type it above — we won't recommend it again.</div>",
                unsafe_allow_html=True,
            )

        with right_col:
            st.markdown(
                "<div style='text-align:center;margin-top:10px;'><small class='secondary-note'>Preview</small></div>",
                unsafe_allow_html=True,
            )

            name_preview = st.session_state.get("user_name", "")
            initial = (
                name_preview.strip()[0].upper()
                if name_preview and name_preview.strip()
                else "U"
            )

            preview_container = st.empty()
            preview_html = f"""
                <div class='preview-block' style='text-align:center; padding-top:12px;'>
                    <div style='display:inline-block;'>
                        <div class='avatar'>{initial}</div>
                    </div>
                    <div style='margin-top:10px; font-weight:600; color:#0B2E33;'>
                        {name.strip() if name else "Guest"}
                    </div>
                    <div style='font-size:12px;color:#335054;margin-top:4px;'>
                        Age: {age}
                    </div>
                </div>
            """
            preview_container.markdown(preview_html, unsafe_allow_html=True)

        submitted = st.form_submit_button(
            "✨ Login & Continue", help="We'll save this locally (user_logins.csv)"
        )

    # handle submit
    if submitted:
        if not name or not name.strip():
            st.error("Please enter your name — we want to address you properly 💕")
            st.stop()

        favorite_book_final = (
            custom_fav.strip()
            if custom_fav.strip()
            else ("" if favorite == "(None)" else favorite)
        )

        csv_path = "user_logins.csv"
        try:
            newfile = not os.path.exists(csv_path)
            ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            with open(csv_path, "a", newline="", encoding="utf-8") as f:
                wr = csv.writer(f)
                if newfile:
                    wr.writerow(["Name", "Age", "Favorite Book", "Timestamp"])
                wr.writerow(
                    [name.strip(), age, str(favorite_book_final).replace(",", ";"), ts]
                )
        except Exception as e:
            st.error(f"Could not save login info: {e}")
            st.stop()

        st.session_state.logged_in = True
        st.session_state.user_name = name.strip()
        st.session_state.user_age = age
        st.session_state.favorite_book = favorite_book_final

        with st.spinner("Preparing your cozy recommendations..."):
            p = st.progress(0)
            for i in range(0, 101, 20):
                p.progress(i)
                time.sleep(0.07)
            p.empty()

        st.success(f"Welcome, {name.strip()} — dashboard ready ✨")
        try:
            st.balloons()
        except Exception:
            pass

        try:
            st.rerun()
        except Exception:
            try:
                st.rerun()
            except Exception:
                st.session_state["_force_rerun_flag"] = str(time.time())
                st.stop()

    st.markdown(
        "<div style='margin-top:10px; font-size:12px; color:#335054;'>"
        "We keep everything local — this app stores only what you enter on this machine."
        "</div>",
        unsafe_allow_html=True,
    )
    st.markdown("</div>", unsafe_allow_html=True)


def user_menu():
    name = st.session_state.get("user_name","")
    first_letter = name[0].upper() if name else "U"
    with st.sidebar:
        st.markdown(f"<div style='width:80px;height:80px;background:#2563eb;border-radius:50%;display:flex;align-items:center;justify-content:center;color:white;font-size:36px;margin:auto;margin-top:10px'>{first_letter}</div>", unsafe_allow_html=True)
        choice = st.selectbox("Menu", ["Dashboard","My Profile","My History","Logout"], index=0, label_visibility="collapsed")
    return choice


def get_favorite_book_history():
    file_path = "user_logins.csv"
    if not os.path.exists(file_path):
        return pd.DataFrame(columns=["Book","Added On"])
    cleaned = []
    with open(file_path, "r", encoding="utf-8") as f:
        r = csv.reader(f)
        headers = next(r, None)
        for row in r:
            while len(row) < 4:
                row.append("")
            book = row[2].replace(";", ",").strip()
            if book in ["", "None", "nan", None]:
                continue
            cleaned.append([row[0], row[1], book, row[3]])
    df = pd.DataFrame(cleaned, columns=["Name","Age","Favorite Book","Timestamp"])
    user_df = df[df["Name"]==st.session_state.user_name]
    if user_df.empty:
        return pd.DataFrame(columns=["Book","Added On"])
    return user_df[["Favorite Book","Timestamp"]].rename(columns={"Favorite Book":"Book","Timestamp":"Added On"})


def user_profile():
    st.subheader("Your Profile")
    st.write("*Name:*", st.session_state.user_name)
    st.write("*Age:*", st.session_state.user_age)
    st.write("*Favorite Book:*", st.session_state.favorite_book or "None")
    st.markdown("---")
    st.subheader("Favorite Book History")
    fav = get_favorite_book_history()
    if fav.empty:
        st.info("No favorite books yet.")
    else:
        st.dataframe(fav, width="stretch")


def user_history():
    st.subheader("Your History")
    read_csv = "user_read_books.csv"
    new_read = st.text_input("Add a book you've finished reading")
    if st.button("Add to read list"):
        if new_read.strip():
            with open(read_csv, "a", newline="", encoding="utf-8") as f:
                wr = csv.writer(f)
                if os.path.getsize(read_csv) == 0 if os.path.exists(read_csv) else True:
                    wr.writerow(["Name","Book","Timestamp"])
                wr.writerow([st.session_state.user_name, new_read.strip(), datetime.now().strftime("%Y-%m-%d %H:%M:%S")])
            st.success("Added.")
    if os.path.exists(read_csv):
        df = pd.read_csv(read_csv)
        user_df = df[df["Name"]==st.session_state.user_name]
        if not user_df.empty:
            st.dataframe(user_df[["Book","Timestamp"]].rename(columns={"Timestamp":"Added At"}), width="stretch")
        else:
            st.info("No read books yet.")
    else:
        st.info("No read books yet.")

from PIL import Image
import pytesseract

def extract_text_from_image(img_file):
    try:
        img = Image.open(img_file)
        return pytesseract.image_to_string(img).strip()
    except Exception:
        return None



def main_app():
    choice = user_menu()
    if choice == "My Profile":
        user_profile(); return
    if choice == "My History":
        user_history(); return
    if choice == "Logout":
        st.session_state.logged_in = False
        st.session_state.user_name = ""
        st.session_state.user_age = None
        st.session_state.favorite_book = ""
        st.rerun()

    st.markdown(f"### {generate_inspirational_quote()}")
    st.header(f"Welcome, {st.session_state.get('user_name','Reader')}")

    option = st.radio("Input Type", ["Text", "Image", "Voice"], index=0)

    # ---------------- TEXT INPUT ----------------
    if option == "Text":
        st.markdown("#### ✍️ You can either enter a mood or book description.")

        mood_list = [
            "joy", "sadness", "anger", "fear", "disgust", "surprise",
            "neutral", "love", "curiosity", "guilt", "shame", "pride",
            "hope", "relief", "confusion", "excitement"
        ]
        mood_emojis = {
            "joy": "😊", "sadness": "😢", "anger": "😠", "fear": "😨",
            "disgust": "🤢", "surprise": "😲", "neutral": "😐", "love": "❤️",
            "curiosity": "🤔", "guilt": "😓", "shame": "😳", "pride": "😌",
            "hope": "🌟", "relief": "😌", "confusion": "😕", "excitement": "🤩"
        }

        mood_options = [f"{mood_emojis[m]} {m}" for m in mood_list]
        selected_mood = st.selectbox("😄 Or pick a mood directly:", [""] + mood_options)
        user_input = st.text_area("🧠 Or write something in your own words:")

        if st.button("Predict Genre"):
            final_input = user_input

            # if only mood is selected
            if selected_mood and not final_input.strip():
                mood_only = selected_mood.split(" ", 1)[1].strip().lower()
                predicted_genre = emotion_to_genre.get(mood_only, "General Fiction")
                recs = get_recommendations(predicted_genre)
                display_recommendations(recs)

            elif final_input.strip():
                # mood keywords
                mood_keywords = [
                    "i feel", "i'm", "i am", "feeling", "bored", "sad", "happy",
                    "angry", "depressed", "anxious", "lonely", "tired", "stressed",
                    "guilty", "shame", "proud", "hopeful", "relieved", "excited", "confused"
                ]

                if len(final_input.split()) < 8 or any(k in final_input.lower() for k in mood_keywords):
                    emotion_pipe = load_emotion_pipeline()
                    emotions = emotion_pipe(final_input, truncation=True, max_length=128)[0]
                    top_emotion = sorted(emotions, key=lambda x: x["score"], reverse=True)[0]["label"].lower()
                    predicted_genre = emotion_to_genre.get(top_emotion, "General Fiction")
                else:
                    genre_pipe = load_genre_pipeline()
                    pred = genre_pipe(final_input, truncation=True, max_length=512)
                    predicted_genre = pred[0]["label"] if pred else "General Fiction"

                recs = get_recommendations(predicted_genre)
                display_recommendations(recs)
            else:
                st.warning("Please pick a mood or write something!")

    # ---------------- IMAGE INPUT ----------------
    elif option == "Image":
        img = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
        if st.button("Predict from Image") and img:
            text = extract_text_from_image(img)
            if text:
                st.info("Extracted text: " + text)

                if len(text.split()) < 8:
                    try:
                        emotion_pipe = load_emotion_pipeline()
                        emotions = emotion_pipe(text, truncation=True, max_length=128)[0]
                        top_emotion = sorted(emotions, key=lambda x: x["score"], reverse=True)[0]["label"].lower()
                        predicted_genre = emotion_to_genre.get(top_emotion, "General Fiction")
                    except Exception:
                        predicted_genre = "General Fiction"
                else:
                    try:
                        genre_pipe = load_genre_pipeline()
                        pred = genre_pipe(text, truncation=True, max_length=512)
                        predicted_genre = pred[0]["label"] if pred else "General Fiction"
                    except Exception:
                        predicted_genre = "General Fiction"

                recs = get_recommendations(predicted_genre)  # <- global function
                display_recommendations(recs)
            else:
                st.error("Could not extract text from image.")

    # ---------------- VOICE ----------------
    elif option == "Voice":
        audio = st.file_uploader("Upload audio", type=["wav","mp3","m4a","mp4"])
        if st.button("Predict from Voice") and audio:
            text = transcribe_audio(audio)
            if text:
                st.info("Transcription: " + text)
                if len(text.split()) < 8:
                    emotion_pipe = load_emotion_pipeline()
                    emotions = emotion_pipe(text, truncation=True, max_length=128)[0]
                    top_emotion = sorted(emotions, key=lambda x: x["score"], reverse=True)[0]["label"].lower()
                    predicted_genre = emotion_to_genre.get(top_emotion, "General Fiction")
                else:
                    genre_pipe = load_genre_pipeline()
                    pred = genre_pipe(text, truncation=True, max_length=512)
                    predicted_genre = pred[0]["label"] if pred else "General Fiction"

                recs = get_recommendations(predicted_genre)  # <- global function
                display_recommendations(recs)
            else:
                st.error("Could not transcribe audio.")



if __name__ == "__main__":
    if not st.session_state.logged_in:
        login_page()
    else:
        main_app()


**Accessing the website**

In [None]:
from pyngrok import ngrok

# Ensure you are specifying the correct port
public_url = ngrok.connect(8501)  # Assuming your Streamlit app is running on port 8501
print("Streamlit URL:", public_url)
