In [1]:
input_file = "books_with_emotions.json"
output_file = "books_with_emotions_fixed.json"

with open(input_file, "r", encoding="utf-8") as f:
    lines = [line.strip() for line in f if line.strip()]

with open(output_file, "w", encoding="utf-8") as f:
    f.write("[\n")
    for i, line in enumerate(lines):
        if i < len(lines) - 1:
            f.write(line + ",\n")
        else:
            f.write(line + "\n")
    f.write("]\n")

In [4]:
import firebase_admin
from firebase_admin import credentials, firestore
import tensorflow as tf
import numpy as np
from transformers import AutoTokenizer
from tqdm import tqdm
from tensorflow.keras.losses import MeanSquaredError
from google.api_core.exceptions import ResourceExhausted
import time

# ───────────────────────────────────────────────
# ✅ Custom SelfAttention Layer (used in training)
# ───────────────────────────────────────────────
from tensorflow.keras.layers import Layer

class SelfAttention(Layer):
    def __init__(self, **kwargs):
        super(SelfAttention, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(
            name="att_weight",
            shape=(input_shape[-1], 1),
            initializer="normal",
            trainable=True
        )
        super(SelfAttention, self).build(input_shape)

    def call(self, x):
        e = tf.nn.tanh(tf.tensordot(x, self.W, axes=1))
        alpha = tf.nn.softmax(e, axis=1)
        return tf.reduce_sum(x * alpha, axis=1)

# ───────────────────────────────────────────────
# ✅ Load Model & Tokenizer
# ───────────────────────────────────────────────
model = tf.keras.models.load_model(
    "emotion_model_1.h5",
    custom_objects={
        "SelfAttention": SelfAttention,
        "mse": MeanSquaredError()
    }
)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

# Extract model input names once
model_input_names = [tensor.name.split(":")[0] for tensor in model.inputs]
print("🧠 Model input names:", model_input_names)

# ───────────────────────────────────────────────
# ✅ Initialize Firebase Admin
# ───────────────────────────────────────────────
cred = credentials.Certificate("books-recommend-61b40-firebase-adminsdk-fbsvc-a1abc2d3ab.json")
if not firebase_admin._apps:
    firebase_admin.initialize_app(cred)
db = firestore.client()
books_ref = db.collection("books")

# ───────────────────────────────────────────────
# ✅ Batch Update with Pagination
# ───────────────────────────────────────────────
BATCH_SIZE = 250
FETCH_LIMIT = 100
MAX_TOKENS = 128
total_updated = 0
last_doc = None

print("🚀 Starting emotion vector update...")

while True:
    try:
        query = books_ref.limit(FETCH_LIMIT)
        if last_doc:
            query = query.start_after(last_doc)

        # Try fetching docs with quota retry
        for retry in range(3):
            try:
                docs = list(query.stream())
                break
            except ResourceExhausted:
                print(f"⚠️ Read quota exceeded. Waiting 60s... (retry {retry+1}/3)")
                time.sleep(60)
        else:
            print("❌ Giving up on reading docs due to repeated quota errors.")
            break

        if not docs:
            break

        batch = db.batch()

        for doc_snapshot in docs:
            doc = doc_snapshot.to_dict()
            doc_ref = books_ref.document(doc_snapshot.id)

            # Skip if already processed or invalid description
            if "emotion_vector" in doc or not isinstance(doc.get("description"), str):
                continue

            input_text = doc["description"]
            # Tokenize using Keras tokenizer
            # Tokenize using Hugging Face tokenizer
            tokens = tokenizer(
                input_text,
                padding="max_length",
                truncation=True,
                max_length=MAX_TOKENS,
                return_tensors="np"
            )

            # Use only input_ids as required by your model
            input_ids = tokens["input_ids"]

            # Clip large token values (to prevent embedding lookup error)
            input_ids = np.clip(input_ids, 0, 9999)

            try:
                prediction = model.predict(input_ids, verbose=0)[0].tolist()
                batch.update(doc_ref, {"emotion_vector": prediction})
                total_updated += 1
            except Exception as e:
                print(f"❌ Prediction error for doc {doc_snapshot.id}: {e}")
                continue



            except Exception as e:
                print(f"❌ Prediction error for doc {doc_snapshot.id}: {e}")
                continue

        batch.commit()
        last_doc = docs[-1]
        print(f"✅ Updated {total_updated} books so far...")

    except ResourceExhausted:
        print("⚠️ Write quota exceeded. Waiting 60 seconds before retrying...")
        time.sleep(60)
    except Exception as e:
        print(f"❌ Unexpected error: {e}")
        time.sleep(5)

print(f"🏁 Finished. Total books updated: {total_updated}")




🧠 Model input names: ['input_layer']
🚀 Starting emotion vector update...
✅ Updated 99 books so far...
✅ Updated 194 books so far...
✅ Updated 294 books so far...
✅ Updated 391 books so far...
✅ Updated 490 books so far...
✅ Updated 589 books so far...
✅ Updated 688 books so far...
✅ Updated 786 books so far...
✅ Updated 884 books so far...
✅ Updated 983 books so far...
✅ Updated 1080 books so far...
✅ Updated 1180 books so far...
✅ Updated 1278 books so far...
✅ Updated 1377 books so far...
✅ Updated 1477 books so far...
✅ Updated 1574 books so far...
✅ Updated 1673 books so far...
✅ Updated 1772 books so far...
✅ Updated 1870 books so far...
✅ Updated 1967 books so far...
✅ Updated 2067 books so far...
✅ Updated 2167 books so far...
✅ Updated 2267 books so far...
✅ Updated 2366 books so far...
✅ Updated 2465 books so far...
✅ Updated 2563 books so far...
✅ Updated 2662 books so far...
✅ Updated 2761 books so far...
✅ Updated 2861 books so far...
✅ Updated 2961 books so far...
✅ Update

In [None]:
# import firebase_admin
# from firebase_admin import credentials, firestore
# import json

# # ───────────────────────────────────────────────
# # ✅ Initialize Firebase
# # ───────────────────────────────────────────────
# cred = credentials.Certificate("books-recommend-61b40-firebase-adminsdk-fbsvc-a1abc2d3ab.json")
# if not firebase_admin._apps:
#     firebase_admin.initialize_app(cred)

# db = firestore.client()

# # ───────────────────────────────────────────────
# # ✅ Export books with emotion_vector
# # ───────────────────────────────────────────────
# books_ref = db.collection("books").stream()
# books = []

# for doc in books_ref:
#     data = doc.to_dict()
#     if "emotion_vector" in data:
#         books.append(data)

# # Save to books.json
# with open("books.json", "w", encoding="utf-8") as f:
#     json.dump(books, f, ensure_ascii=False, indent=2)

# print(f"✅ Exported {len(books)} books to books.json")


✅ Exported 19769 books to books.json
