In [None]:
# Model Training and Evaluation

# Importando bibliotecas necessárias
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from surprise import Dataset, Reader, KNNBasic, accuracy
from surprise.model_selection import train_test_split

# Carregando os dados
ratings = pd.read_csv('../data/ratings.csv')
products = pd.read_csv('../data/products.csv')

# Preprocessando os dados
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['user_id', 'product_id', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2)

# Treinando o modelo de Filtragem Colaborativa
algo = KNNBasic()
algo.fit(trainset)

# Avaliando o modelo de Filtragem Colaborativa
predictions = algo.test(testset)
rmse = accuracy.rmse(predictions)
print(f"Collaborative Filtering RMSE: {rmse}")

# Fazendo recomendações colaborativas para um usuário específico
user_id = 'user_1'
def get_collaborative_recommendations(user_id, algo, trainset, n=10):
    all_products = trainset.all_items()
    product_ids = [trainset.to_raw_iid(iid) for iid in all_products]
    user_ratings = trainset.ur[trainset.to_inner_uid(user_id)]
    user_rated_products = [trainset.to_raw_iid(iid) for (iid, _) in user_ratings]
    products_to_recommend = [iid for iid in product_ids if iid not in user_rated_products]
    predictions = [algo.predict(user_id, iid) for iid in products_to_recommend]
    predictions.sort(key=lambda x: x.est, reverse=True)
    return [(pred.iid, pred.est) for pred in predictions[:n]]

collab_recommendations = get_collaborative_recommendations(user_id, algo, trainset)
print("Collaborative Filtering Recommendations:")
print(collab_recommendations)

# Preprocessando os dados dos produtos
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(products['features'])
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix) # type: ignore

# Fazendo recomendações baseadas em conteúdo para um produto específico
product_id = 'product_1'
def get_content_recommendations(product_id, cosine_sim, products, n=10):
    idx = products[products['product_id'] == product_id].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:n+1]
    product_indices = [i[0] for i in sim_scores]
    return products.iloc[product_indices]

content_recommendations = get_content_recommendations(product_id, cosine_sim, products)
print("Content-Based Filtering Recommendations:")
print(content_recommendations)

# Hibridizando as recomendações
def hybrid_recommendations(user_id, product_id, algo, trainset, cosine_sim, products, n=10):
    collab_recs = get_collaborative_recommendations(user_id, algo, trainset, n)
    content_recs = get_content_recommendations(product_id, cosine_sim, products, n)
    combined_recs = collab_recs + [(product['product_id'], 0) for product in content_recs.to_dict('records')]
    seen = set()
    unique_recs = []
    for rec in combined_recs:
        if rec[0] not in seen:
            seen.add(rec[0])
            unique_recs.append(rec)
        if len(unique_recs) >= n:
            break
    return unique_recs

hybrid_recs = hybrid_recommendations(user_id, product_id, algo, trainset, cosine_sim, products)
print("Hybrid Recommendations:")
print(hybrid_recs)

# (Adicione mais treinamentos e avaliações conforme necessário)
