In [18]:
from sklearn.base import BaseEstimator, ClassifierMixin
import numpy as np
import pandas as pd

class NBI(BaseEstimator, ClassifierMixin):

    def __init__(self):
        pass

    def fit(self, A):
        Adj = A
        degrees_item = Adj.sum(axis=1)
        degrees_user = Adj.sum(axis=0)

        A = np.asanyarray(A)
        k_x = np.sum(A, axis=0, dtype=float)
        k_y = np.sum(A, axis=1, dtype=float)
        W = np.zeros((A.shape[1], A.shape[1])) #number of user
        for i in range(W.shape[0]):
          for j in range(W.shape[1]):
            sum = 0
            for y in range(A.shape[0]):
              if degrees_item[y]!= 0:
                sum = sum + (((Adj.iloc[y,j]==1)) * ((Adj.iloc[y,i]==1)) ) / degrees_user[i] / degrees_item[y]
            W[j, i] = sum

        self.W_ =W
        return self

    def predict(self, a, user_index):
        a = np.asanyarray(a)
        f_prime = np.sum(np.multiply(a,self.W_[user_index,:]))
        self.y_ = f_prime
        return self.y_

#Read data
df = pd.read_csv('/content/Magazine_Subscriptions.csv', names=['asin', 'reviewerID', 'overall', 'unixReviewTime'])


item_degrees = df['asin'].value_counts()
user_degrees = df['reviewerID'].value_counts()
nodes_to_keep = set(item_degrees[item_degrees >= 1].index) | set(user_degrees[user_degrees >= 1].index)
filtered_edges_df = df[(df['asin'].isin(nodes_to_keep)) & (df['reviewerID'].isin(nodes_to_keep))]



unwanted_columns = ['overall', 'unixReviewTime']
reduced_df = filtered_edges_df.sample(frac=0.2, random_state=32)
reduced_df.drop(columns=unwanted_columns, inplace=True)

from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(reduced_df, test_size=0.1, random_state=42)

train_edges = []
test_edges = []

train_users = set()
test_users = set()

items = set()

for index, row in train_set.iterrows():
  items.add(row['asin'])
  train_edges.append((row['asin'], row['reviewerID']))
  train_users.add(row['reviewerID'])

for index, row in test_set.iterrows():
  items.add(row['asin'])
  test_edges.append((row['asin'], row['reviewerID']))
  test_users.add(row['reviewerID'])


df2 = pd.DataFrame(train_edges, columns=['x', 'y'])
adj_matrix = pd.crosstab(df2['x'], df2['y'])
#
users_in_test = set(test_set['reviewerID'])
common_users = users_in_test.intersection(adj_matrix.columns)
adj_matrix_filtered = adj_matrix[common_users]


df3 = pd.DataFrame(test_edges, columns=['x', 'y'])
adj_matrix2 = pd.crosstab(df3['y'], df3['x'])
adj_matrix_filtered2 = adj_matrix2.loc[common_users]

#---------------------------------------------------------
#Downside of filtering: less information about each user and items, which could effect accuracy negatively
common_users_list = list(common_users)

# Filter train_set to keep only common users
train_set_filtered = train_set[train_set['reviewerID'].isin(common_users_list)]

# Filter test_set to keep only common users
test_set_filtered = test_set[test_set['reviewerID'].isin(common_users_list)]

train_edges_filtered = []
test_edges_filtered = []

train_user_filtered = set()
test_users_filtered = set()

items_filtered = set()

for index, row in train_set_filtered.iterrows():
  items_filtered.add(row['asin'])
  train_edges_filtered.append((row['asin'], row['reviewerID']))
  train_user_filtered.add(row['reviewerID'])

for index, row in test_set_filtered.iterrows():
  items_filtered.add(row['asin'])
  test_edges_filtered.append((row['asin'], row['reviewerID']))
  test_users_filtered.add(row['reviewerID'])

  adj_matrix_filtered = adj_matrix[common_users]
  adj_matrix_filtered2 = adj_matrix2.loc[common_users]


In [19]:
# Instantiate the NBI classifier
classifier = NBI()

# Fit the model with your data
classifier.fit(adj_matrix_filtered)
print(classifier.W_)
# Assuming 'new_data' is the data you want to predict on
#predictions = classifier.predict(adj_matrix_filtered2)


[[0.2        0.         0.         ... 0.         0.         0.        ]
 [0.         0.33333333 0.         ... 0.         0.         0.        ]
 [0.         0.         0.5        ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 1.         0.         0.        ]
 [0.         0.         0.         ... 0.         1.         0.        ]
 [0.         0.         0.         ... 0.         0.         1.        ]]


In [20]:
list_recommanded = pd.DataFrame()
user_index = 3
for i in range(len(adj_matrix_filtered2.columns)):
  name_of_item = adj_matrix_filtered2.columns[i]
  a = adj_matrix_filtered2[name_of_item]
  score = classifier.predict(a, user_index)
  new_row = pd.DataFrame({'Item': [name_of_item], 'Score': [score]})
  list_recommanded = pd.concat([list_recommanded, new_row], ignore_index=True)
sorted_list = list_recommanded.sort_values(by='Score', ascending=False)
#print(sorted_list[1:20])

In [24]:
#NBI Method
sum = 0
for items in test_edges_filtered:
  user_index = adj_matrix_filtered.columns.get_loc(items[1])
  list_recommanded = pd.DataFrame()
  for i in range(len(adj_matrix_filtered2.columns)):
    name_of_item = adj_matrix_filtered2.columns[i]
    a = adj_matrix_filtered2[name_of_item]
    score = classifier.predict(a, user_index)
    new_row = pd.DataFrame({'Item': [name_of_item], 'Score': [score]})
    list_recommanded = pd.concat([list_recommanded, new_row], ignore_index=True)
  sorted_list = list_recommanded.sort_values(by='Score', ascending=False)
  sum = sum + (sorted_list.index[sorted_list['Item'] == items[0]][0] + 1) / len(sorted_list)
average_hit_score = sum / len(test_edges_filtered)
print("The accuracy for NBI is:", average_hit_score)
#print(f"Which means the average position of some user's purchased item is about {average_hit_score*len(sorted_list)} on the recommanded list.")

The accuracy for NBI is: 0.44554031265667005
