List of activities:
- Previous action: Hover, Click on Suggestion
- After action: Give score, Interaction, Watch Time

**Library**

In [None]:
import math
import numpy as np
import pandas as pd
import re
import string
import random

**Preprocess rating.csv**

In [None]:
# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/anime.csv')
df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [None]:
df.drop(columns = ['type', 'episodes'], inplace = True)

In [None]:
df.dropna(axis = 0, inplace = True)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 12017 entries, 0 to 12293
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12017 non-null  int64  
 1   name      12017 non-null  object 
 2   genre     12017 non-null  object 
 3   rating    12017 non-null  float64
 4   members   12017 non-null  int64  
dtypes: float64(1), int64(2), object(2)
memory usage: 563.3+ KB


In [None]:
def clean(text):

    # Remove all punctuation:
    for char in text:
        if char in string.punctuation+u'\N{DEGREE SIGN}'+'039':
            text = text.replace(char,"")

    # Convert to lowercase:
    text = re.sub(r'&quot;', '', text)
    text = re.sub(r'.hack//', '', text)
    text = re.sub(r'&#039;', '', text)
    text = re.sub(r'A&#039;s', '', text)
    text = re.sub(r'I&#039;', 'I\'', text)
    text = re.sub(r'&amp;', 'and', text)
    text = re.sub(u'\N{DEGREE SIGN}','',text)
    #text = text.lower()

    return text

In [None]:
df.name = df.name.apply(clean)
df.head()

Unnamed: 0,anime_id,name,genre,rating,members
0,32281,Kimi no Na wa,"Drama, Romance, School, Supernatural",9.37,200630
1,5114,Fullmetal Alchemist Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",9.26,793665
2,28977,Gintama,"Action, Comedy, Historical, Parody, Samurai, S...",9.25,114262
3,9253,SteinsGate,"Sci-Fi, Thriller",9.17,673572
4,9969,Gintama,"Action, Comedy, Historical, Parody, Samurai, S...",9.16,151266


**Preprocess rating.csv**

In [None]:
rating = pd.read_csv('/content/drive/MyDrive/rating.csv')
rating.head()

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1


In [None]:
rating.replace(-1, np.nan, inplace = True)
rating.dropna(inplace = True)
rating.head()

Unnamed: 0,user_id,anime_id,rating
47,1,8074,10.0
81,1,11617,10.0
83,1,11757,10.0
101,1,15451,10.0
153,2,11771,10.0


**Visualization**

**Các hàm dùng cho hệ thống**

In [None]:
def popular_retrieve(df):
  rating = df.rating.to_numpy()
  member = df.members.to_numpy()

  r_condition = random.uniform(np.quantile(rating, 0.75), np.quantile(rating, 0.95))
  m_condition = random.uniform(np.quantile(member, 0.75), np.quantile(member, 0.95))
  #phân vị 75%
  filter = [value1 and value2 for value1, value2 in zip(rating > r_condition, member > m_condition)]
  return df.name.to_numpy()[filter]

In [None]:
def final_rec(df, rating, rate):
  recommendation = []

  rate_num = rating.user_id.value_counts().to_numpy()
  score_num = len(static_rating.keys()) + len(dynamic_rating.keys())
  content = content_rec(df, rate)

  if score_num <= np.quantile(rate_num, 0.25):
      popular = popular_retrieve(df)
      content_num = round(score_num*10/np.quantile(rate_num, 0.25))
      popular_num = 10 - content_num

      recommendation.extend(content[:content_num, 0])
      recommendation.extend(random.sample(sorted(popular), popular_num))
  return recommendation

In [None]:
def static_transform(df, rating, user_id):
  id_to_idx = {}
  for idx, id in enumerate(df['anime_id'].to_numpy()):
    id_to_idx[id] = idx

  user_rating = rating[rating.user_id == user_id].to_numpy()
  rate = {}
  for r in user_rating:
    rate[id_to_idx[r[1]]] = r[2]
  return rate

In [None]:
def genre_process(data):
  genre = data.genre.to_numpy()
  genre_list = []
  genre_dict = {}

  for g in genre:
    genre_list.extend(g.split(', '))
  genre_list = list(set(genre_list))

  for idx in range(len(genre_list)):
    genre_dict[genre_list[idx]] = idx

  genre_num = len(genre_list)
  movie_num = len(data.index)

  data = np.zeros((movie_num, genre_num))
  for idx in range(len(genre)):
    for g in genre[idx].split(', '):
      data[idx, genre_dict[g]] = 1
  return data

In [None]:
def normalized(data):
  norm_data = []
  val_cal = np.sum(data, axis=1)
  for row in range(data.shape[0]):
    norm_data.append(data[row]/np.sqrt(val_cal[row]))
  return np.array(norm_data)

In [None]:
def idf(data):
  df = np.sum(data, axis=0)
  idf = 1 + np.emath.logn(data.shape[0], 1/df)
  return idf

In [None]:
def profile_cal(data, user_rating):
  user_profile = np.dot(user_rating, data)
  return user_profile

In [None]:
def content_rec(df, user_rating):
  data = genre_process(df)

  norm_data = normalized(data)
  idf_score = idf(data)
  w_data = np.array(norm_data * idf_score)

  user_profile = profile_cal(norm_data, user_rating)
  prediction = np.dot(w_data, user_profile)
  pred_dict = pd.DataFrame({'anime_name' : df.name, 'rating' : prediction}).sort_values(by = ['rating'], ascending = False)
  return pred_dict.head(10).to_numpy()

In [None]:
def interest_eval(type, description):
  if type == 'score':
    return description
  elif type == 'hover':
    return min(9, description - 1)
  elif type == 'view':
    return math.floor(description*10)
  elif type == 'like':
    return 9
  elif type == 'dislike':
    return 1
  elif type == 'comment':
    return 6
  elif type == 'suggest':
    return 9

In [None]:
def rating_cal(activity, static_rating, dynamic_rating):
  new_rating = interest_eval(activity['type'], activity['description'])

  if activity['type'] == 'score':
      static_rating[activity['movie_id']] = new_rating
  else:

    if activity['movie_id'] not in dynamic_rating.keys():
      dynamic_rating[activity['movie_id']] = new_rating
    else:
      if activity['type'] != 'hover' and activity['type'] != 'suggest':
        if new_rating < dynamic_rating[activity['movie_id']]:
          if new_rating <= 5:
            dynamic_rating[activity['movie_id']] = 0.6 * new_rating + 0.4 * dynamic_rating[activity['movie_id']]
        else:
          dynamic_rating[activity['movie_id']] = new_rating

  return static_rating, dynamic_rating

In [None]:
def user_rating(movie_num, static_rating, dynamic_rating):
  rating = np.zeros(movie_num)
  for id in dynamic_rating.keys():
    rating[id] = dynamic_rating[id]

  for id in static_rating.keys():
    rating[id] = static_rating[id]
  return rating

**Sử dụng rating từ database**

In [None]:
user_id = 100

In [None]:
pd.merge(df,rating[rating['user_id'] == user_id],on="anime_id",suffixes= [None, "_user"])

Unnamed: 0,anime_id,name,genre,rating,members,user_id,rating_user
0,6746,Durarara,"Action, Mystery, Supernatural",8.38,556431,100,10.0
1,9919,Ao no Exorcist,"Action, Demons, Fantasy, Shounen, Supernatural",7.92,583823,100,10.0
2,11757,Sword Art Online,"Action, Adventure, Fantasy, Game, Romance",7.83,893100,100,10.0
3,1281,Gakkou no Kaidan,"Horror, Mystery, Supernatural",7.71,42033,100,10.0
4,8074,Highschool of the Dead,"Action, Ecchi, Horror, Supernatural",7.46,535892,100,9.0


In [None]:
static_rating = static_transform(df, rating, user_id)
dynamic_rating = {}
static_rating, dynamic_rating

({1031: 10.0, 166: 10.0, 1709: 9.0, 643: 10.0, 804: 10.0}, {})

**Rating từ hoạt động người dùng**

In [None]:
activity = {'movie_id' : '', 'type' : '', 'description' : 0}

In [None]:
activity['movie_id'] = 10
activity['type'] = 'hover'
activity['description'] = 5
rating_cal(activity, static_rating, dynamic_rating)

({}, {10: 4})

In [None]:
activity['movie_id'] = 10
activity['type'] = 'view'
activity['description'] = 0.6
rating_cal(activity, static_rating, dynamic_rating)

({}, {10: 6})

In [None]:
activity['movie_id'] = 10
activity['type'] = 'dislike'
activity['description'] = 1
rating_cal(activity, static_rating, dynamic_rating)

({}, {10: 3.0000000000000004})

In [None]:
activity['movie_id'] = 10
activity['type'] = 'hover'
activity['description'] = 2
rating_cal(activity, static_rating, dynamic_rating)

({}, {10: 3.0000000000000004})

In [None]:
activity['movie_id'] = 10
activity['type'] = 'suggest'
activity['description'] = 0
rating_cal(activity, static_rating, dynamic_rating)

({}, {10: 3.0000000000000004})

In [None]:
activity['movie_id'] = 10
activity['type'] = 'like'
activity['description'] = 0
rating_cal(activity, static_rating, dynamic_rating)

({}, {10: 9})

In [None]:
activity['movie_id'] = 10
activity['type'] = 'score'
activity['description'] = 1
rating_cal(activity, static_rating, dynamic_rating)

({10: 1}, {10: 9})

In [None]:
activity['movie_id'] = 10
activity['type'] = 'score'
activity['description'] = 3
rating_cal(activity, static_rating, dynamic_rating)

({10: 3}, {10: 9})

In [None]:
activity['movie_id'] = 10
activity['type'] = 'comment'
activity['description'] = 0
rating_cal(activity, static_rating, dynamic_rating)

({10: 3}, {10: 9})

In [None]:
activity['movie_id'] = 11
activity['type'] = 'view'
activity['description'] = 0.6
rating_cal(activity, static_rating, dynamic_rating)

({10: 3}, {10: 9, 11: 6})

In [None]:
activity['movie_id'] = 11
activity['type'] = 'comment'
activity['description'] = 0
rating_cal(activity, static_rating, dynamic_rating)

({10: 3}, {10: 9, 11: 6})

In [None]:
activity['movie_id'] = 11
activity['type'] = 'like'
activity['description'] = 0
rating_cal(activity, static_rating, dynamic_rating)

({10: 3}, {10: 9, 11: 9})

**Chuyển đổi rating dictionary thành rating array**

In [None]:
rate = user_rating(len(df.index), static_rating, dynamic_rating)
rate

array([0., 0., 0., ..., 0., 0., 0.])

**Recommendation**

In [None]:
recommendation = final_rec(df, rating, rate)
recommendation

['Ookamikakushi',
 'Gakkou no Kaidan Recaps',
 'Gakkou no Kaidan',
 'Gakkou no Kaidan Kubinashi Rider Shi no Noroi',
 'Eve no Jikan Movie',
 'One Punch Man Specials',
 'Clannad After Story',
 'Kami nomi zo Shiru Sekai II',
 'Kara no Kyoukai 2 Satsujin Kousatsu Zen',
 'Diamond no Ace']