In [None]:
from pathlib import Path
import pandas as pd, numpy as np
from scipy.sparse import coo_matrix, save_npz

BASE_DIR  = Path.cwd().parent     # .. /book-recommender
DATA_DIR  = BASE_DIR / "data"
MODELS_DIR = BASE_DIR / "models"


In [None]:
ratings = pd.read_csv(DATA_DIR / "ratings.csv")
books   = pd.read_csv(DATA_DIR / "books.csv")
print(ratings.shape, books.shape)


In [None]:
rows = ratings.book_id.values - 1
cols = ratings.user_id.values - 1
vals = ratings.rating.astype(np.float32)

item_user = coo_matrix((vals, (rows, cols))).tocsr()
save_npz(DATA_DIR / "item_user.npz", item_user)
print("✅ Saved:", DATA_DIR / "item_user.npz")


## Step2 train ALS-models

In [None]:
from pathlib import Path
from scipy.sparse import load_npz
from implicit.als import AlternatingLeastSquares
import pickle, time

BASE_DIR   = Path.cwd().parent     # …/book-recommender
DATA_DIR   = BASE_DIR / "data"
MODELS_DIR = BASE_DIR / "models"
MODELS_DIR.mkdir(exist_ok=True)


In [None]:
item_user = load_npz(DATA_DIR / "item_user.npz")
print("Матрица:", item_user.shape)


In [None]:
als = AlternatingLeastSquares(
    factors=64, iterations=20, regularization=0.1,
    calculate_training_loss=True, random_state=42
)

t0 = time.perf_counter()
als.fit(item_user)
print(f"⏱  Training finished in {time.perf_counter()-t0:.1f}s")


In [None]:
with open(MODELS_DIR / "als.pkl", "wb") as f:
    pickle.dump(als, f)
print("✅ Model Save:", MODELS_DIR / "als.pkl")


In [None]:
uid = 0                          # user_id = 1 in 1-based numeric
user_items = item_user.T.tocsr()[uid]   # <1 × items> CSR-matrix

recs = als.recommend(uid, user_items, N=5)
recs


## 🔄 New index markup (id_map) + matrix

In [None]:
unique_ids = sorted(ratings.book_id.unique())
rev_map = {bid: idx for idx, bid in enumerate(unique_ids)}   # book_id → idx
id_map  = {idx: bid for bid, idx in rev_map.items()}         # idx → book_id


In [None]:
row  = ratings.book_id.map(rev_map).values
col  = ratings.user_id.values - 1
data = ratings.rating.astype("float32")

item_user = coo_matrix((data, (row, col))).tocsr()
save_npz(DATA_DIR/"item_user_mapped.npz", item_user)

import pickle
with open(DATA_DIR/"id_map.pkl", "wb") as f:
    pickle.dump(id_map, f)

print("✅ item_user_mapped.npz и id_map.pkl ready")


In [None]:
from pathlib import Path
from scipy.sparse import load_npz
from implicit.als import AlternatingLeastSquares
import pickle, time

BASE_DIR = Path.cwd().parent
DATA_DIR = BASE_DIR / "data"
MODELS_DIR = BASE_DIR / "models"; MODELS_DIR.mkdir(exist_ok=True)

print(" Download item_user_mapped.npz …")
item_user = load_npz(DATA_DIR / "item_user_mapped.npz")   # new matrix
print("Shape :", item_user.shape)


In [None]:
print("Train ALS …")
als = AlternatingLeastSquares(factors=64, iterations=20,
                              regularization=0.1, random_state=42)
t0 = time.perf_counter()
als.fit(item_user)
print(f"⏱  Training finished in {time.perf_counter()-t0:.1f}s")

with open(MODELS_DIR / "als.pkl", "wb") as f:
    pickle.dump(als, f)
print("✅ Model was Save:", MODELS_DIR / "als.pkl")


In [None]:
from pathlib import Path
from scipy.sparse import load_npz
import pickle, numpy as np

DATA = Path.cwd().parent / "data"

mat = load_npz(DATA / "item_user_mapped.npz")
print("Shape :", mat.shape)          # должно быть (~54000, 53424)
print("Size MB:", DATA.joinpath("item_user_mapped.npz").stat().st_size / 1e6)

id_map = pickle.load(open(DATA / "id_map.pkl", "rb"))
print("len(id_map):", len(id_map))   # должно совпадать с shape[0]
