In [None]:
import pandas as pd
import numpy as np

NUM_USERS = 1000
NUM_ARTICLES = 500
LIKES_PER_USER = 20

user_ids = [i for i in range(NUM_USERS)]
article_ids = [i for i in range(NUM_ARTICLES)]

data = []

for user in user_ids:
    liked_articles = np.random.choice(article_ids, LIKES_PER_USER, replace=False)
    for article in liked_articles:
        data.append((user, article, 1))

df = pd.DataFrame(data, columns=["user_id", "article_id", "liked"])

NEGATIVE_SAMPLES_PER_USER = 10
for user in user_ids:
    unliked_articles = np.random.choice(article_ids, NEGATIVE_SAMPLES_PER_USER, replace=False)
    for article in unliked_articles:
        if not ((df["user_id"] == user) & (df["article_id"] == article)).any():
            data.append((user, article, 0))

df = pd.DataFrame(data, columns=["user_id", "article_id", "liked"])
df.head(100)

Unnamed: 0,user_id,article_id,liked
0,0,218,1
1,0,476,1
2,0,481,1
3,0,30,1
4,0,246,1
...,...,...,...
95,4,267,1
96,4,215,1
97,4,11,1
98,4,243,1


In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import mean_squared_error

user_ids = df["user_id"].astype("category").cat.codes
article_ids = df["article_id"].astype("category").cat.codes

num_users = user_ids.max() + 1
num_articles = article_ids.max() + 1

interaction_matrix = np.zeros((num_users, num_articles))

for user, article, like in zip(user_ids, article_ids, df["liked"]):
    interaction_matrix[user, article] = like

svd = TruncatedSVD(n_components=20)
latent_matrix = svd.fit_transform(interaction_matrix)

predicted_matrix = np.dot(latent_matrix, svd.components_)

def get_recommendations(user_id, predicted_matrix, top_n=5):
    user_ratings = predicted_matrix[user_id]

    recommended_article_indices = user_ratings.argsort()[-top_n:][::-1]

    return recommended_article_indices

user_id = 9
recommended_articles = get_recommendations(user_id, predicted_matrix, top_n=5)

article_id_mapping = df["article_id"].astype("category").cat.categories
recommended_article_ids = article_id_mapping[recommended_articles].tolist()

print(f"Рекомендованные статьи для пользователя {user_id}: {recommended_article_ids}")

test_interactions = np.array([interaction_matrix[user, article] for user, article in zip(user_ids, article_ids)])
predicted_interactions = np.array([predicted_matrix[user, article] for user, article in zip(user_ids, article_ids)])

rmse = np.sqrt(mean_squared_error(test_interactions, predicted_interactions))
print(f"RMSE модели: {rmse}")

Рекомендованные статьи для пользователя 9: [287, 152, 83, 325, 193]
RMSE модели: 0.714974363580217


In [None]:
import joblib

joblib.dump(svd, 'svd_model.pkl')

['svd_model.pkl']