# **RECOMMENDATION SYSTEM**

# *CONTENT BASED*

## 1. Similarity Measure (Cosine Similarity)

In [3]:
import numpy as np 
import pandas as pd 

from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# buat Item-Feature Matrix

movies = ["Terminator 2","Interstellar","Ant Man 2","3 Idiots"]
scores = [7,9,8,9]
action = [1,0,1,0]
scifi = [1,1,1,0]
adventure = [0,1,1,0]
comedy = [0,0,1,1]
drama = [0,1,0,1]

df_movies = pd.DataFrame({
    'movie':movies,
    'scores':scores,
    'Action':action,
    'Sci-Fi':scifi,
    'Adventure':adventure,
    'Comedy':comedy,
    'Drama':drama
})

df_movies

Unnamed: 0,movie,scores,Action,Sci-Fi,Adventure,Comedy,Drama
0,Terminator 2,7,1,1,0,0,0
1,Interstellar,9,0,1,1,0,1
2,Ant Man 2,8,1,1,1,1,0
3,3 Idiots,9,0,0,0,1,1


In [6]:
df_movies[df_movies['movie']=='Terminator 2'].iloc[:, 2:]

Unnamed: 0,Action,Sci-Fi,Adventure,Comedy,Drama
0,1,1,0,0,0


In [12]:
Terminator = df_movies[df_movies['movie']=='Terminator 2'].iloc[:, 2:]
Interstellar = df_movies[df_movies['movie']=='Interstellar'].iloc[:, 2:]
Antman = df_movies[df_movies['movie']=='Ant Man 2'].iloc[:, 2:]
idiots = df_movies[df_movies['movie']=='3 Idiots'].iloc[:, 2:]


print('Terminator - Interstellar', cosine_similarity(Terminator, Interstellar))
print('Terminator - Antman', cosine_similarity(Terminator, Antman))
print('Terminator - idiots', cosine_similarity(Terminator, idiots))


Terminator - Interstellar [[0.40824829]]
Terminator - Antman [[0.70710678]]
Terminator - idiots [[0.]]


**Summary**

- User telah menonton Terminator.
- Film apa yang akan direkomendasikan selanjutnya?
- Kita menggunakan cosine similarity untuk mencari film yang paling mirip
- Nilai cosine similaryty terbesar adalah antara Terminatore dan AntMan (0.70)
- Maka film yang akan direkomendasikan selanjutnya adalah AntMan

## 2. Content Based Filtering

**Single User**

In [59]:
# buat Item-Feature Matrix

movies = ["Terminator 2","Interstellar","Ant Man 2","3 Idiots"]
scores = [7,9,8,9]
action = [1,0,1,0]
scifi = [1,1,1,0]
adventure = [0,1,1,0]
comedy = [0,0,1,1]
drama = [0,1,0,1]

df_movies = pd.DataFrame({
    'movie':movies,
    'scores':scores,
    'Action':action,
    'Sci-Fi':scifi,
    'Adventure':adventure,
    'Comedy':comedy,
    'Drama':drama
})

df_movies

Unnamed: 0,movie,scores,Action,Sci-Fi,Adventure,Comedy,Drama
0,Terminator 2,7,1,1,0,0,0
1,Interstellar,9,0,1,1,0,1
2,Ant Man 2,8,1,1,1,1,0
3,3 Idiots,9,0,0,0,1,1


In [60]:
df_movies2 = df_movies.copy()
df_movies2.drop(columns='movie', inplace=True)
df_movies2

Unnamed: 0,scores,Action,Sci-Fi,Adventure,Comedy,Drama
0,7,1,1,0,0,0
1,9,0,1,1,0,1
2,8,1,1,1,1,0
3,9,0,0,0,1,1


In [61]:
df_movies2.columns[1:]

Index(['Action', 'Sci-Fi', 'Adventure', 'Comedy', 'Drama'], dtype='object')

In [62]:
for i in df_movies2.columns[1:]:
    df_movies2[i] = df_movies2['scores'] * df_movies2[i]

df_movies2

Unnamed: 0,scores,Action,Sci-Fi,Adventure,Comedy,Drama
0,7,7,7,0,0,0
1,9,0,9,9,0,9
2,8,8,8,8,8,0
3,9,0,0,0,9,9


In [63]:
# Item Feature Matrix (with rating)

df_movies2 = df_movies2.drop(columns='scores')
df_movies2

Unnamed: 0,Action,Sci-Fi,Adventure,Comedy,Drama
0,7,7,0,0,0
1,0,9,9,0,9
2,8,8,8,8,0
3,0,0,0,9,9


In [64]:
df_movies2.sum()

Action       15
Sci-Fi       24
Adventure    17
Comedy       17
Drama        18
dtype: int64

In [65]:
df_movies2.sum().sum()

91

In [66]:
# User Feature Vector

user_feature_vector = df_movies2.sum() / df_movies2.sum().sum()
user_feature_vector

Action       0.164835
Sci-Fi       0.263736
Adventure    0.186813
Comedy       0.186813
Drama        0.197802
dtype: float64

In [67]:
# Film yang belum ditonton

# ini film yg belum pernah ditonton
movies = ["Titanic",'Martian','GOTG Vol 2']
action = [1,0,1]
scifi = [1,1,1]
adventure = [0,1,1]
comedy = [0,0,1]
drama = [0,1,0]

df_movies_recommendation = pd.DataFrame({
    'movie':movies,
    'Action':action,
    'Sci-Fi':scifi,
    'Adventure':adventure,
    'Comedy':comedy,
    'Drama':drama
})

df_movies_recommendation

Unnamed: 0,movie,Action,Sci-Fi,Adventure,Comedy,Drama
0,Titanic,1,1,0,0,0
1,Martian,0,1,1,0,1
2,GOTG Vol 2,1,1,1,1,0


In [68]:
# Prediksi rating yang akan diberikan oleh User terhadap film-film yang belum ditonton

df_scoring = df_movies_recommendation.copy()

for i in df_movies_recommendation.columns[1:]:
    df_scoring[i] = user_feature_vector[i] * df_movies_recommendation[i]

df_scoring

Unnamed: 0,movie,Action,Sci-Fi,Adventure,Comedy,Drama
0,Titanic,0.164835,0.263736,0.0,0.0,0.0
1,Martian,0.0,0.263736,0.186813,0.0,0.197802
2,GOTG Vol 2,0.164835,0.263736,0.186813,0.186813,0.0


In [69]:
df_scoring.iloc[:, 1:].sum(axis=1)

0    0.428571
1    0.648352
2    0.802198
dtype: float64

In [70]:
df_scoring['score'] = df_scoring.iloc[:, 1:].sum(axis=1)
df_scoring

Unnamed: 0,movie,Action,Sci-Fi,Adventure,Comedy,Drama,score
0,Titanic,0.164835,0.263736,0.0,0.0,0.0,0.428571
1,Martian,0.0,0.263736,0.186813,0.0,0.197802,0.648352
2,GOTG Vol 2,0.164835,0.263736,0.186813,0.186813,0.0,0.802198


**Summary**

- User telah menonton Terminator 2, Interstellar, Ant Man 2, 3 Idiots
- Film apa yang akan direkomendasikan selanjutnya?
- Kita menggunakan metode content based filtering 
- Score tertinggi adalah GOTG (0.80), maka GOTG yang akan direkomendasikan

**Multiple User**

In [75]:
# User-Item Matrix
user = ['user 1','user 2','user 3','user 4']
terminator_2 = [7,8,9,0]
interstellar = [9,0,0,7]
ant_man_2 = [8,6,0,0]
three_idiots = [9,5,10,9]

df_user_items = pd.DataFrame({
    'user':user,
    'Terminator 2':terminator_2,
    'Interstellar':interstellar,
    'Ant Man 2':ant_man_2,
    '3 Idiots':three_idiots
})

df_user_items

Unnamed: 0,user,Terminator 2,Interstellar,Ant Man 2,3 Idiots
0,user 1,7,9,8,9
1,user 2,8,0,6,5
2,user 3,9,0,0,10
3,user 4,0,7,0,9


In [76]:
# Item-Feature matrix
movies = ["Terminator 2","Interstellar","Ant Man 2","3 Idiots"]
action = [1,0,1,0]
scifi = [1,1,1,0]
adventure = [0,1,1,0]
comedy = [0,0,1,1]
drama = [0,1,0,1]

df_item_features = pd.DataFrame({
    'movie':movies,
    'Action':action,
    'Sci-Fi':scifi,
    'Adventure':adventure,
    'Comedy':comedy,
    'Drama':drama
})

df_item_features

Unnamed: 0,movie,Action,Sci-Fi,Adventure,Comedy,Drama
0,Terminator 2,1,1,0,0,0
1,Interstellar,0,1,1,0,1
2,Ant Man 2,1,1,1,1,0
3,3 Idiots,0,0,0,1,1


In [78]:
# Movie scoring: setiap feature ditotalkan, lalu di normalisasi

arr_user_items = np.array(df_user_items.drop('user', axis = 1))
arr_item_features = np.array(df_item_features.drop('movie', axis = 1))

n_user = arr_user_items.shape[0]
n_item = arr_user_items.shape[1]
n_feature = arr_item_features.shape[1]

arr_user_items_score = np.empty((n_user,n_item))
arr_user_feature = np.empty((n_user,n_feature))

for i in range(0,n_user):
    # print(arr_user_items[i,:])
    user_feature = np.matmul(arr_user_items[i,:],arr_item_features)
    # print(user_feature)
    user_feature = user_feature/user_feature.sum()
    arr_user_feature[i,:] = user_feature

In [79]:
# User Feature Matrix
# User Feature Vector (User 1-4)

df_user_feature = pd.DataFrame(arr_user_feature)
df_user_feature.columns = df_item_features.columns[1:]
df_user_feature.index = user
df_user_feature

Unnamed: 0,Action,Sci-Fi,Adventure,Comedy,Drama
user 1,0.164835,0.263736,0.186813,0.186813,0.197802
user 2,0.28,0.28,0.12,0.22,0.1
user 3,0.236842,0.236842,0.0,0.263158,0.263158
user 4,0.0,0.179487,0.179487,0.230769,0.410256


In [80]:
# Prediksi rating yang akan diberikan oleh tiap user untuk film2 yg belum ditonton
# 

for i in range(0, n_user):
    user_item_score = np.matmul(arr_item_features,arr_user_feature[i,:])
    arr_user_items_score[i,:] = user_item_score

arr_user_items_score_unwatched = np.where(arr_user_items == 0,arr_user_items_score,0)

df_user_items_score_unwatched = pd.DataFrame(arr_user_items_score_unwatched)
df_user_items_score_unwatched.columns = movies
df_user_items_score_unwatched.index = user
df_user_items_score_unwatched

Unnamed: 0,Terminator 2,Interstellar,Ant Man 2,3 Idiots
user 1,0.0,0.0,0.0,0.0
user 2,0.0,0.5,0.0,0.0
user 3,0.0,0.5,0.736842,0.0
user 4,0.179487,0.0,0.589744,0.0


*Rekomendasi*

- User 3 akan direkomendasikan untuk menonton AntMan dulu, setelah itu baru Interstellar.
- User 4 akan direkomendasikan untuk menonton AntMan dulu, setelah itu baru Terminator.
- Kita bisa membuat threshold, misalnya jika film dgn rating dibawah 0.5 maka tidak akan direkomendasikan ke user



In [81]:
# Film yg belum ditonton oleh semua user

movies = ["Titanic","Martian","GOTG Vol 2"]
action = [1,0,1]
scifi = [1,1,1]
adventure = [0,1,1]
comedy = [0,0,1]
drama = [0,1,0]

df_item_features_new = pd.DataFrame({
    'movie':movies,
    'Action':action,
    'Sci-Fi':scifi,
    'Adventure':adventure,
    'Comedy':comedy,
    'Drama':drama
})

df_item_features_new

Unnamed: 0,movie,Action,Sci-Fi,Adventure,Comedy,Drama
0,Titanic,1,1,0,0,0
1,Martian,0,1,1,0,1
2,GOTG Vol 2,1,1,1,1,0


In [82]:
# Prediksi rating yang akan diberikan oleh tiap user untuk film2 yg belum ditonton

arr_item_features_new = np.array(df_item_features_new.drop('movie', axis = 1))

n_item_new = df_item_features_new.shape[0]

arr_user_items_score_new = np.empty((n_user,n_item_new))

for i in range(0, n_user):
  user_item_score = np.matmul(arr_item_features_new,arr_user_feature[i,:])
  arr_user_items_score_new[i,:] = user_item_score

df_user_items_score_new = pd.DataFrame(arr_user_items_score_new)
df_user_items_score_new.index = user
df_user_items_score_new.columns = df_item_features_new['movie']
df_user_items_score_new

movie,Titanic,Martian,GOTG Vol 2
user 1,0.428571,0.648352,0.802198
user 2,0.56,0.5,0.9
user 3,0.473684,0.5,0.736842
user 4,0.179487,0.769231,0.589744


*Rekomendasi*

1. User 1: GOTG, Martian, Titanic.
1. User 2: GOTG, Titanic, Martian.
1. User 3: GOTG, Martian, Titanic.
1. User 4: Martian, GOTG, Titanic.
