## Popularity Based Recommendations

In [1]:
import pandas as pd
import numpy as np

In [2]:
interaction = pd.read_csv('https://bradfordtuckfield.com/purchasehistory1.csv')
interaction.set_index("Unnamed: 0", inplace=True)
print(interaction)

            user1  user2  user3  user4  user5
Unnamed: 0                                   
item1           1      1      0      1      1
item2           1      0      1      1      0
item3           1      1      0      1      1
item4           1      0      1      0      1
item5           1      1      0      0      1


In [9]:
# Create a function that will return the most popular items in order from the interaction matrix dataframe
def popularity_based(interaction):
    interaction_withcounts = interaction.copy()
    interaction_withcounts['counts'] = interaction_withcounts.sum(axis=1)
    sorted = interaction_withcounts.sort_values(by='counts',ascending=False)
    most_popular = list(sorted.index)
    return(most_popular)

In [10]:
popularity_based(interaction)

['item1', 'item3', 'item2', 'item4', 'item5']

## Item-Based Collaborative Filtering

### Measuring Vector Similarity

### Calculating Cosine Similarity

## Document and save this one

In [18]:
def dot_product(vector1, vector2):
    thedotproduct = np.sum([vector1[k] * vector2[k] for k in range(0, len(vector1))])
    return(thedotproduct)

In [19]:
def vector_norm(vector):
    thenorm = np.sqrt(dot_product(vector,vector))
    return (thenorm)

In [20]:
def cosine_similarity(vector1, vector2):
    thedotproduct = dot_product(vector1,vector2)
    thecosine = thedotproduct/(vector_norm(vector1)*vector_norm(vector2))
    thecosine=np.round(thecosine,4)
    return(thecosine)

In [23]:
def get_item_recommendations(interaction, itemname):
    otherrows = [rowname for rowname in interaction.index if rowname!=itemname]
    otheritems = interaction.loc[otherrows,:]
    theitem = list(interaction.loc[itemname,:])
    similarities = []
    for items in otheritems.index:
        similarities.append(cosine_similarity(theitem, list(otheritems.loc[items])))
    otheritems['similarities'] = similarities
    return list(otheritems.sort_values(by='similarities',ascending=False).index)

In [24]:
get_item_recommendations(interaction,'item1')

['item3', 'item5', 'item2', 'item4']

## User-Based Collaborative Filtering

# Document this and save

In [34]:
def get_similar_users(interaction, username):
    othercolumns = [columnname for columnname in interaction.columns if columnname != username]
    otherusers = interaction[othercolumns]
    theuser = list(interaction[username])
    similarities = []
    for users in otherusers.columns:
        similarities.append(cosine_similarity(theuser, list(otherusers.loc[:,users])))
    otherusers.loc['similarities',:]=similarities
    return list(otherusers.sort_values(by='similarities', axis=1, ascending=False).columns)

In [35]:
def get_user_recommendations(interaction, username):
    similar_users = get_similar_users(interaction,username)
    purchase_history = interaction[similar_users[0]]
    purchased = list(purchase_history.loc[purchase_history==1].index)
    purchased2 = list(interaction.loc[interaction[username]==1].index)
    recs = sorted(list(set(purchased)-set(purchased2)))
    return(recs)

In [36]:
get_user_recommendations(interaction, 'user2')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  otherusers.loc['similarities',:]=similarities


['item4']

## Case Study: Music Recommendations

In [41]:
lastfm = pd.read_csv("https://bradfordtuckfield.com/lastfm-matrix-germany.csv")
lastfm.head(5)

Unnamed: 0,user,a perfect circle,abba,ac/dc,adam green,aerosmith,afi,air,alanis morissette,alexisonfire,...,timbaland,tom waits,tool,tori amos,travis,trivium,u2,underoath,volbeat,yann tiersen
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,33,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,42,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,51,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,62,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [42]:
lastfm.drop(['user'],axis=1,inplace=True)

In [43]:
lastfm = lastfm.T

In [44]:
lastfm.shape

(285, 1257)

In [45]:
get_item_recommendations(lastfm, 'abba')[0:10]

['madonna',
 'robbie williams',
 'elvis presley',
 'michael jackson',
 'queen',
 'the beatles',
 'kelly clarkson',
 'groove coverage',
 'duffy',
 'mika']

In [46]:
get_user_recommendations(lastfm,0)[0:3]

  thecosine = thedotproduct/(vector_norm(vector1)*vector_norm(vector2))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  otherusers.loc['similarities',:]=similarities


['billy talent', 'bob marley', 'die toten hosen']