In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, average_precision_score
from sklearn.preprocessing import StandardScaler, LabelEncoder


In [35]:
products_data = pd.read_csv("prod_cat_info.csv")
customer_profiles = pd.read_csv("Customer.csv")
transaction = pd.read_csv("Transactions.csv")



In [18]:
products_data.drop_duplicates(inplace=True)
customer_profiles.drop_duplicates(inplace=True)
transaction.drop_duplicates(inplace=True)


In [19]:
products_data.fillna(0, inplace=True)  # Assuming missing values are filled with 0
customer_profiles.dropna(inplace=True)  # Drop rows with missing values


In [20]:
merged_data = pd.merge(transaction, customer_profiles,on="cust_id",how='left')

In [22]:
label_encoder = LabelEncoder()
merged_data['gender_encoded'] = label_encoder.fit_transform(merged_data['Gender'])


Collaborative Filtering using NumPy:

In [13]:
class CollaborativeFiltering:
    def __init__(self, n_factors=20, learning_rate=0.01, n_epochs=20, verbose=False):
        self.n_factors = n_factors
        self.learning_rate = learning_rate
        self.n_epochs = n_epochs
        self.verbose = verbose

In [21]:

def fit(self, ratings):
    n_users, n_items = ratings.shape
    self.user_factors = np.random.normal(scale=1/self.n_factors, size=(n_users, self.n_factors))
    self.item_factors = np.random.normal(scale=1/self.n_factors, size=(n_items, self.n_factors))
        
    for epoch in range(self.n_epochs):
        for u in range(n_users):
            for i in range(n_items):
                if ratings[u, i] > 0:
                    error = ratings[u, i] - np.dot(self.user_factors[u], self.item_factors[i])
                    self.user_factors[u] += self.learning_rate * (error * self.item_factors[i])
                    self.item_factors[i] += self.learning_rate * (error * self.user_factors[u])
        if self.verbose:
            mse = self.calculate_mse(ratings)
            print("Epoch {}: MSE = {}".format(epoch+1, mse))


In [15]:
def predict(self, user_id, item_id):
        return np.dot(self.user_factors[user_id], self.item_factors[item_id])


In [16]:
def calculate_mse(self, ratings):
        n_users, n_items = ratings.shape
        error_sum = 0
        count = 0
        for u in range(n_users):
            for i in range(n_items):
                if ratings[u, i] > 0:
                    error = ratings[u, i] - np.dot(self.user_factors[u], self.item_factors[i])
                    error_sum += error ** 2
                    count += 1
        return error_sum / count

In [17]:
# Example usage:
ratings = np.array([[5, 0, 4, 0],
                    [0, 4, 0, 0],
                    [4, 0, 0, 0],
                    [0, 3, 0, 2]])

In [19]:
model = CollaborativeFiltering(n_factors=2, learning_rate=0.01, n_epochs=100, verbose=True)
model.fit(ratings)

AttributeError: 'CollaborativeFiltering' object has no attribute 'fit'

In [20]:
user_id = 0
item_id = 1
prediction = model.predict(user_id, item_id)
print("Predicted rating for user {} on item {}: {}".format(user_id, item_id, prediction))

AttributeError: 'CollaborativeFiltering' object has no attribute 'predict'

Content-Based Filtering using scikit-learn:

In [22]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel


In [23]:
product_data = {
    'item_id': [1, 2, 3],
    'description': ['This is product 1', 'Product 2 is great', 'Product 3 description']
}

In [25]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(product_data['description'])


In [26]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)


In [27]:
def content_based_recommendation(item_id, cosine_sim=cosine_sim):
    idx = item_id - 1  # Assuming item_id starts from 1
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Top 10 similar items
    return [i[0] + 1 for i in sim_scores]  # Add 1 to item_id to match original indexing


In [28]:
recommended_items = content_based_recommendation(1)
print("Recommended Items:", recommended_items)

Recommended Items: [2, 3]


Hybrid Model combining Collaborative and Content-Based Filtering:

In [32]:
def hybrid_recommendation(user_id, collaborative_model, content_based_model):
    user_ratings = collaborative_model.user_factors[user_id]  # User preferences from collaborative model
    content_recommendations = content_based_model(1)  # Content-based recommendations
    # Combine recommendations by taking the average of user preferences and content-based scores
    hybrid_scores = [(i, (user_ratings[i-1] + content_score) / 2) for i, content_score in enumerate(content_recommendations, start=1)]
    hybrid_scores.sort(key=lambda x: x[1], reverse=True)  # Sort by score
    return [item_id for item_id, _ in hybrid_scores[:10]]  # Return top 10 recommended items


In [33]:
user_id = 0
hybrid_recommendations = hybrid_recommendation(user_id, model, content_based_recommendation)
print("Hybrid Recommendations for User {}: {}".format(user_id, hybrid_recommendations))


AttributeError: 'CollaborativeFiltering' object has no attribute 'user_factors'

In [36]:
train_data, test_data = train_test_split(transaction, test_size=0.2, random_state=42)


In [39]:
precision = precision_score(test_data["cust_id"], test_data["predicted_user_id"])
recall = recall_score(test_data["cust_id"], test_data["predicted_user_id"])
f1 = f1_score(test_data["cust_id"], test_data["predicted_user_id"])


ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].

In [40]:
average_precision = average_precision_score(test_data["user_id"], test_data["predicted_user_id"])


KeyError: 'user_id'

In [41]:
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)
print("Mean Average Precision:", average_precision)


NameError: name 'precision' is not defined