In [21]:
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy import Table, select
from sqlalchemy.ext.hybrid import hybrid_property
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime

from Models.helper import *

import pandas as pd
import bcrypt
import re

In [14]:
app = Flask(__name__)

db_config = {
    'SQLALCHEMY_DATABASE_URI' : 'sqlite:///Database/doubi_database.db',
    'SQLALCHEMY_TRACK_MODIFICATIONS' : False
}

app.config.update(db_config)
db = SQLAlchemy(app)

In [15]:
followers = Table('followers', db.metadata,
    db.Column('followed_id', db.String(32), db.ForeignKey('user.u_id')),
    db.Column('follower_id', db.String(32), db.ForeignKey('user.u_id'))
)

blocked_users = Table('blocked_users', db.metadata,
    db.Column('blocked_id', db.String(32), db.ForeignKey('user.u_id')),
    db.Column('blocker_id', db.String(32), db.ForeignKey('user.u_id')),
)

users_wish_film = Table('users_wish_film', db.metadata,
    db.Column('user_id', db.String(32), db.ForeignKey('user.u_id')),
    db.Column('film_id', db.String(32), db.ForeignKey('film.f_id'))
)

bad_word = db.Table('bad_word', db.metadata,
    db.Column('w_id', db.Integer, primary_key=True, autoincrement=True),
    db.Column('word', db.String(32), nullable=False))

class User(db.Model):
    __tablename__ = 'user'
    
    u_id = db.Column(db.String(32), primary_key=True, nullable=False, unique=True, default=u_id_generator)
    username = db.Column(db.String(80), nullable=False, unique=True)
    password_hash = db.Column(db.Text, nullable=False)
    email = db.Column(db.String(80), nullable=False, unique=True)
    url_photo = db.Column(db.Text, nullable=True)
    is_admin = db.Column(db.Boolean, nullable=False, default=False)
    is_blocked = db.Column(db.Boolean, nullable=False, default=False)

    created_time = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
    updated_time = db.Column(db.DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow)

    followed = db.relationship('User', 
                                secondary=followers,
                                primaryjoin=(followers.c.follower_id == u_id),
                                secondaryjoin=(followers.c.followed_id == u_id),
                                backref=db.backref('followers', lazy='dynamic'),
                                lazy='dynamic')

    blocked = db.relationship('User',
                                secondary=blocked_users,
                                primaryjoin=(blocked_users.c.blocker_id == u_id),
                                secondaryjoin=(blocked_users.c.blocked_id == u_id),
                                backref=db.backref('blockers', lazy='dynamic'),
                                lazy='dynamic')
    
    reviews = db.relationship('Review', backref='user', lazy='dynamic')

    review_likes = db.relationship('Review_Like', backref='user', lazy='dynamic')
    review_dislikes = db.relationship('Review_Dislike', backref='user', lazy='dynamic')
    
    wish = db.relationship('Film', secondary=users_wish_film, backref='user', lazy='dynamic')
    
    @property
    def password(self):
        raise AttributeError('password is not a readable attribute')

    @password.setter
    def password(self, password):
        salt = bcrypt.gensalt()
        self.password_hash = bcrypt.hashpw(password.encode('utf-8'), salt)
    
    def verify_password(self, password):
        return bcrypt.checkpw(password.encode('utf-8'), self.password_hash)
    
    def __repr__(self):
        return '<User %r>' % self.username


    

class Film(db.Model):
    __tablename__ = 'film'

    f_id = db.Column(db.String(32), primary_key=True, nullable=False, unique=True, default=f_id_generator)
    title = db.Column(db.String(80), nullable=False)
    genre = db.Column(db.String(80), nullable=False)
    year = db.Column(db.Integer, nullable=True)
    run_time = db.Column(db.String(16), nullable=True)
    rating_imdb = db.Column(db.Float, nullable=True)
    overview = db.Column(db.String(500), nullable=True)
    director = db.Column(db.String(80), nullable=True)
    actor = db.Column(db.String(200), nullable=True)
    url_poster = db.Column(db.Text, nullable=True)
    rating_doubi = db.Column(db.Float, nullable=True)

    created_time = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
    updated_time = db.Column(db.DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow)

    reviews = db.relationship('Review', backref='film', lazy='dynamic')
    
    @hybrid_property
    def rating(self):
        reviews = self.reviews.all()
        if len(reviews) == 0:
            return 0
        else:
            return round(sum(review.rating for review in reviews) / len(reviews), 1)
        
    @rating.expression
    def rating(cls):
        return select(func.avg(Review.rating)).where(Review.f_id == cls.f_id)
    
    @property
    def rating_distribution(self):
        reviews = self.reviews.all()
        rating_distribution = {x: 0 for x in range(0, 5)}
        for review in reviews:
            rating_distribution[review.rating] = rating_distribution.get(review.rating, 0) + 1
        return rating_distribution

    @property
    def genres(self):
        return [genre.strip() for genre in self.genre.split(',')]
    
    @property
    def actors(self):
        return [actor.strip() for actor in self.actor.split(',')]
    
    
    def rating_customized(self, current_user):
        reviews = self.reviews.all()
        blocked_id = [x.u_id for x in current_user.blocked.all()]
        reviews = [x for x in reviews if x.u_id not in blocked_id]
        if len(reviews) == 0:
            return 0
        else:
            return round(sum(review.rating for review in reviews) / len(reviews), 1)
        
    def rating_distribution_customized(self, current_user):
        reviews = self.reviews.all()
        blocked_id = [x.u_id for x in current_user.blocked.all()]
        reviews = [x for x in reviews if x.u_id not in blocked_id]
        rating_distribution = {x: 0 for x in range(0, 5)}
        for review in reviews:
            rating_distribution[review.rating] = rating_distribution.get(review.rating, 0) + 1
        return rating_distribution
    
    def __repr__(self):
        return '<Film %r>' % self.title
    


class Review(db.Model):
    __tablename__ = 'review'

    r_id = db.Column(db.String(32), primary_key=True, nullable=False, unique=True, default=r_id_generator)
    u_id = db.Column(db.String(32), db.ForeignKey('user.u_id'), nullable=False)
    f_id = db.Column(db.String(32), db.ForeignKey('film.f_id'), nullable=False)
    content = db.Column(db.String(500), nullable=True)
    rating = db.Column(db.Integer, nullable=False)
    bad_word = db.Column(db.Boolean, nullable=False, default=False)

    created_time = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
    updated_time = db.Column(db.DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow)

    likes = db.relationship('Review_Like', backref='review', lazy='dynamic')
    dislikes = db.relationship('Review_Dislike', backref='review', lazy='dynamic')
    
    @property
    def like(self):
        return self.likes.count()
    
    @property
    def dislike(self):
        return self.dislikes.count()


class Review_Like(db.Model):
    __tablename__ = 'review_like'

    r_id = db.Column(db.String(32), db.ForeignKey('review.r_id'), primary_key=True, nullable=False)
    u_id = db.Column(db.String(32), db.ForeignKey('user.u_id'), primary_key=True, nullable=False)

    created_time = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)


class Review_Dislike(db.Model):
    __tablename__ = 'review_dislike'

    r_id = db.Column(db.String(32), db.ForeignKey('review.r_id'), primary_key=True, nullable=False)
    u_id = db.Column(db.String(32), db.ForeignKey('user.u_id'), primary_key=True, nullable=False)

    created_time = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)

In [16]:
ENGLISH_STOP_WORDS = set([
    'a',
    'about',
    'above',
    'across',
    'after',
    'afterwards',
    'again',
    'against',
    'ain',
    'all',
    'almost',
    'alone',
    'along',
    'already',
    'also',
    'although',
    'always',
    'am',
    'among',
    'amongst',
    'amoungst',
    'amount',
    'an',
    'and',
    'another',
    'any',
    'anyhow',
    'anyone',
    'anything',
    'anyway',
    'anywhere',
    'are',
    'aren',
    'around',
    'as',
    'at',
    'back',
    'be',
    'became',
    'because',
    'become',
    'becomes',
    'becoming',
    'been',
    'before',
    'beforehand',
    'behind',
    'being',
    'below',
    'beside',
    'besides',
    'between',
    'beyond',
    'bill',
    'both',
    'bottom',
    'but',
    'by',
    'call',
    'can',
    'cannot',
    'cant',
    'co',
    'con',
    'could',
    'couldn',
    'couldnt',
    'cry',
    'd',
    'de',
    'describe',
    'detail',
    'did',
    'didn',
    'do',
    'does',
    'doesn',
    'doing',
    'don',
    'done',
    'down',
    'due',
    'during',
    'each',
    'eg',
    'eight',
    'either',
    'eleven',
    'else',
    'elsewhere',
    'empty',
    'enough',
    'etc',
    'even',
    'ever',
    'every',
    'everyone',
    'everything',
    'everywhere',
    'except',
    'few',
    'fifteen',
    'fify',
    'fill',
    'find',
    'fire',
    'first',
    'five',
    'for',
    'former',
    'formerly',
    'forty',
    'found',
    'four',
    'from',
    'front',
    'full',
    'further',
    'get',
    'give',
    'go',
    'had',
    'hadn',
    'has',
    'hasn',
    'hasnt',
    'have',
    'haven',
    'having',
    'he',
    'hence',
    'her',
    'here',
    'hereafter',
    'hereby',
    'herein',
    'hereupon',
    'hers',
    'herself',
    'him',
    'himself',
    'his',
    'how',
    'however',
    'hundred',
    'i',
    'ie',
    'if',
    'in',
    'inc',
    'indeed',
    'interest',
    'into',
    'is',
    'isn',
    'it',
    'its',
    'itself',
    'just',
    'keep',
    'last',
    'latter',
    'latterly',
    'least',
    'less',
    'll',
    'ltd',
    'm',
    'ma',
    'made',
    'many',
    'may',
    'me',
    'meanwhile',
    'might',
    'mightn',
    'mill',
    'mine',
    'more',
    'moreover',
    'most',
    'mostly',
    'move',
    'much',
    'must',
    'mustn',
    'my',
    'myself',
    'name',
    'namely',
    'needn',
    'neither',
    'never',
    'nevertheless',
    'next',
    'nine',
    'no',
    'nobody',
    'none',
    'noone',
    'nor',
    'not',
    'nothing',
    'now',
    'nowhere',
    'o',
    'of',
    'off',
    'often',
    'on',
    'once',
    'one',
    'only',
    'onto',
    'or',
    'other',
    'others',
    'otherwise',
    'our',
    'ours',
    'ourselves',
    'out',
    'over',
    'own',
    'part',
    'per',
    'perhaps',
    'please',
    'put',
    'rather',
    're',
    's',
    'same',
    'see',
    'seem',
    'seemed',
    'seeming',
    'seems',
    'serious',
    'several',
    'shan',
    'she',
    'should',
    'shouldn',
    'show',
    'side',
    'since',
    'sincere',
    'six',
    'sixty',
    'so',
    'some',
    'somehow',
    'someone',
    'something',
    'sometime',
    'sometimes',
    'somewhere',
    'still',
    'such',
    'system',
    't',
    'take',
    'ten',
    'than',
    'that',
    'the',
    'their',
    'theirs',
    'them',
    'themselves',
    'then',
    'thence',
    'there',
    'thereafter',
    'thereby',
    'therefore',
    'therein',
    'thereupon',
    'these',
    'they',
    'thick',
    'thin',
    'third',
    'this',
    'those',
    'though',
    'three',
    'through',
    'throughout',
    'thru',
    'thus',
    'to',
    'together',
    'too',
    'top',
    'toward',
    'towards',
    'twelve',
    'twenty',
    'two',
    'un',
    'under',
    'until',
    'up',
    'upon',
    'us',
    've',
    'very',
    'via',
    'was',
    'wasn',
    'we',
    'well',
    'were',
    'weren',
    'what',
    'whatever',
    'when',
    'whence',
    'whenever',
    'where',
    'whereafter',
    'whereas',
    'whereby',
    'wherein',
    'whereupon',
    'wherever',
    'whether',
    'which',
    'while',
    'whither',
    'who',
    'whoever',
    'whole',
    'whom',
    'whose',
    'why',
    'will',
    'with',
    'within',
    'without',
    'won',
    'would',
    'wouldn',
    'y',
    'yet',
    'you',
    'your',
    'yours',
    'yourself',
    'yourselves'
])

In [20]:
films = Film.query.all()
films = [[
    film.f_id,
    film.genre,
    film.director,
    film.actor,
    film.title,
    film.overview,
] for film in films]
df = pd.DataFrame(films, columns=['f_id', 'genre', 'director', 'actor', 'title', 'overview'])

In [None]:
df['kwd'] = ''

for index, row in df.iterrows():
    
    overview = row['overview']
    # remove punctuation
    overview = re.sub(r'[^\w\s]', '', overview)
    # convert to lowercase
    overview = overview.lower()
    # remove stopwords
    overview = [word for word in overview.split() if word not in ENGLISH_STOP_WORDS]
    # print(overview)
    row['kwd'] = overview

df['kwd'].head()

0    [imprisoned, men, bond, number, years, finding...
1    [organized, crime, dynastys, aging, patriarch,...
2    [menace, known, joker, wreaks, havoc, chaos, p...
3    [early, life, career, vito, corleone, 1920s, n...
4    [jury, holdout, attempts, prevent, miscarriage...
Name: kwd, dtype: object

In [None]:
df['genre'] = df['genre'].map(lambda x: x.split(','))
df['actor'] = df['actor'].map(lambda x: x.split(',')[:3])
df['director'] = df['director'].map(lambda x: x.split(','))
for index, row in df.iterrows():
    row['genre'] = [x.lower().replace(' ','') for x in row['genre']]
    row['actor'] = [x.lower().replace(' ','') for x in row['actor']]
    row['director'] = [x.lower().replace(' ','') for x in row['director']]
df

Unnamed: 0,title,genre,overview,director,actor,kwd
0,The Shawshank Redemption,[drama],Two imprisoned men bond over a number of years...,[frankdarabont],"[timrobbins, morganfreeman, bobgunton]","[imprisoned, men, bond, number, years, finding..."
1,The Godfather,"[crime, drama]",An organized crime dynasty's aging patriarch t...,[francisfordcoppola],"[marlonbrando, alpacino, jamescaan]","[organized, crime, dynastys, aging, patriarch,..."
2,The Dark Knight,"[action, crime, drama]",When the menace known as the Joker wreaks havo...,[christophernolan],"[christianbale, heathledger, aaroneckhart]","[menace, known, joker, wreaks, havoc, chaos, p..."
3,The Godfather: Part II,"[crime, drama]",The early life and career of Vito Corleone in ...,[francisfordcoppola],"[alpacino, robertdeniro, robertduvall]","[early, life, career, vito, corleone, 1920s, n..."
4,12 Angry Men,"[crime, drama]",A jury holdout attempts to prevent a miscarria...,[sidneylumet],"[henryfonda, leej.cobb, martinbalsam]","[jury, holdout, attempts, prevent, miscarriage..."
...,...,...,...,...,...,...
995,Breakfast at Tiffany's,"[comedy, drama, romance]",A young New York socialite becomes interested ...,[blakeedwards],"[audreyhepburn, georgepeppard, patricianeal]","[young, new, york, socialite, interested, youn..."
996,Giant,"[drama, western]",Sprawling epic covering the life of a Texas ca...,[georgestevens],"[elizabethtaylor, rockhudson, jamesdean]","[sprawling, epic, covering, life, texas, cattl..."
997,From Here to Eternity,"[drama, romance, war]","In Hawaii in 1941, a private is cruelly punish...",[fredzinnemann],"[burtlancaster, montgomeryclift, deborahkerr]","[hawaii, 1941, private, cruelly, punished, box..."
998,Lifeboat,"[drama, war]",Several survivors of a torpedoed merchant ship...,[alfredhitchcock],"[tallulahbankhead, johnhodiak, walterslezak]","[survivors, torpedoed, merchant, ship, world, ..."


In [None]:
df['bag_of_words'] = ''
columns = ['genre', 'director', 'actor', 'kwd']
for index, row in df.iterrows():
    words = ''
    for col in columns:
        words += ' '.join(row[col]) + ' '
    row['bag_of_words'] = words
    
df = df[['title','bag_of_words']]
df

In [None]:
count = CountVectorizer()
count_matrix = count.fit_transform(df['bag_of_words'])
cosine_sim = cosine_similarity(count_matrix, count_matrix)
print(cosine_sim)


[[1.         0.05564149 0.04950738 ... 0.05423261 0.05057217 0.        ]
 [0.05564149 1.         0.14048787 ... 0.05129892 0.04783649 0.08240856]
 [0.04950738 0.14048787 1.         ... 0.04564355 0.04256283 0.03666178]
 ...
 [0.05423261 0.05129892 0.04564355 ... 1.         0.13987572 0.        ]
 [0.05057217 0.04783649 0.04256283 ... 0.13987572 1.         0.03745029]
 [0.         0.08240856 0.03666178 ... 0.         0.03745029 1.        ]]


In [None]:
indices = pd.Series(df['title'])
indices

0      The Shawshank Redemption
1                 The Godfather
2               The Dark Knight
3        The Godfather: Part II
4                  12 Angry Men
                 ...           
995      Breakfast at Tiffany's
996                       Giant
997       From Here to Eternity
998                    Lifeboat
999                The 39 Steps
Name: title, Length: 1000, dtype: object

In [None]:
def recommend(title, cosine_sim = cosine_sim):
    recommended_movies = []
    idx = indices[indices == title].index[0]
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)
    top_10_indices = list(score_series.iloc[1:11].index)
    
    for i in top_10_indices:
        recommended_movies.append(list(df['title'])[i])
        
    return recommended_movies


In [None]:
recommend('The Dark Knight Rises')

['Batman Begins',
 'The Incredibles',
 'The Dark Knight',
 'Die Hard: With a Vengeance',
 'Interstellar',
 'Shichinin no samurai',
 'Yip Man',
 'The Blues Brothers',
 'First Blood',
 'Mad Max 2']

In [32]:
reviews = Review.query.all()
films = Film.query.all()
films_dict = {
    film.f_id: film.title for film in films
}
reviews = [[
    films_dict[review.f_id],
    review.u_id,
    review.rating,
] for review in reviews]
df = pd.DataFrame(reviews, columns=['f_id', 'u_id', 'rating'])
df.head()

Unnamed: 0,f_id,u_id,rating
0,The Shawshank Redemption,84df4ab052f43c8696603445608d3472,2
1,The Shawshank Redemption,6fde09279aa93426a5a4960c9e203db0,4
2,The Shawshank Redemption,761cc2d1be6732339f568d974943d9fc,2
3,The Shawshank Redemption,8f1bc099930c321e85158a9c41ffd6cc,2
4,The Shawshank Redemption,a0e0d06eae9239829160148580dd9149,2


In [34]:
movie_features_df=df.pivot_table(
      index='f_id',columns='u_id',values='rating').fillna(0)
movie_features_df

u_id,008ef55e4cbb3347a7816d9145011d53,008fdf556dc23c1aa9e8ad5c2a6c60db,00cca7767cba3ae3a797f46eb1ff4140,00ccd6470c0f3f8d96f3f554401407a7,00e85ef45c9e32169a16e576af71020d,00ead050a7e13c1d8de6a65ba316a7d5,0103f36967bc3656be5d8e59c5917945,01390892caaa311f8b5e071b3dd4bd5a,01bd4421f5d536f997d46f6ea3fe9b92,025bc11a2c64356bba7f72344f539d57,...,fd44142ea86138f29ed3fd545775a480,fd7616a0f81c3cd1a8da2c065a42bf3f,fda7f019076438978ee7e751eab5b2d3,fe0bfde5fecd34dea20e249e7316a9af,fe2a2f541b80370a9fe1b2d9eaa61e3e,feff35e6bd5c32fe8d6fd5b8c253442d,ff44a07034fa32399278d051ca2dafa4,ff6eed84dfab39109d36ef7e0893d552,ff95195eb23c3d43b06d165876e2a99d,ffe1a4d1d0023589b14424541fe65a91
f_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
(500) Days of Summer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12 Angry Men,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,2.0,0.0,4.0,0.0
12 Years a Slave,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1917,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2001: A Space Odyssey,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zootopia,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zulu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Zwartboek,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
À bout de souffle,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
from scipy.sparse import csr_matrix
movie_features_df_csr = csr_matrix(movie_features_df.values)

from sklearn.neighbors import NearestNeighbors
model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(movie_features_df_csr)

In [39]:
import numpy as np
query_index = np.random.choice(movie_features_df.shape[0])
print(query_index)
distances, indices = model.kneighbors(movie_features_df.iloc[query_index,:].values.reshape(1, -1),
                                          n_neighbors = 6)

for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for {0}:\n'
              .format(movie_features_df.index[query_index]))
    else:
        print('{0}: {1}, with distance of {2}:'
              .format(i, movie_features_df.index[indices.flatten()[i]],
                      distances.flatten()[i]))

624
Recommendations for Raging Bull:

1: Paths of Glory, with distance of 0.4264606653235956:
2: Mommy, with distance of 0.487010823957423:
3: Dà hóng denglong gaogao guà, with distance of 0.6755571577384749:
4: Man on Fire, with distance of 0.694112354839251:
5: The Elephant Man, with distance of 0.7132303326617978:


In [70]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
reviews = Review.query.all()
reviews = [[
    review.f_id,
    review.u_id,
    review.rating,
] for review in reviews]

df = pd.DataFrame(reviews, columns=['f_id', 'u_id', 'rating'])

In [72]:
matrix = df.pivot_table(index='u_id', columns='f_id', values='rating')
matrix

f_id,00537e16ca9933bc89e98f18f6a82b56,009e831595153771b33df743d9997e33,00a663b38e8c30de8061cfadc748c155,00c5f48faeaa39fab039c436cb1238b6,010d145d4a013f80a71ec0300525eb67,0138ab70c51a349ca0b7f7e79cde578e,015c6dcd139834a884cfc7cab878b032,016e87b8489f3976aabba815fec3c55a,0176f8d8ab133f8dbef1d6bd76f7d864,01b7c03689113a6aa4e7ed403a42bd93,...,fd2e3e9e23c63ce4827ac2e0cd8b39bd,fd54b4830fcf372b8e3e8f73051e5527,fd56fc92e58134398dcb018f005f65fb,fdab47747a45363fba08e70062a7532e,feb77e23dd9534a49f5fd2da23cb2e74,fee55cd833ee341cbd320901ba42eac5,ff1459cd107c37d88df76e56d3761164,ff34fee1555131e6a8401bfa0c7866c5,ff3bb9fcdc0d3c2d957a29d2644b1335,ffb4f0730dd43df3b097acca8b89ddad
u_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
008ef55e4cbb3347a7816d9145011d53,,,,,,,,,,,...,,,,,,,,,,
008fdf556dc23c1aa9e8ad5c2a6c60db,,,,,,,,,,,...,,,,,,,,,,
00cca7767cba3ae3a797f46eb1ff4140,,,,,,,,,,,...,,,,,,,,,,
00ccd6470c0f3f8d96f3f554401407a7,,,,,,,,,,,...,,,,,,,,,,
00e85ef45c9e32169a16e576af71020d,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
feff35e6bd5c32fe8d6fd5b8c253442d,,,,,,,,,,,...,,,,,,,,,,
ff44a07034fa32399278d051ca2dafa4,,,,,,,,,,,...,,,,,,,,,,
ff6eed84dfab39109d36ef7e0893d552,,,,,,,,,,,...,,,,,,,,,,
ff95195eb23c3d43b06d165876e2a99d,,,,,,,,,,,...,,,,,,0.0,,,,


In [74]:
mu = np.mean(np.mean(matrix))
bx = np.array(np.mean(matrix, axis=1) - mu)
by = np.array(np.mean(matrix, axis=0) - mu)

X = df.sub(bx+mu, axis=0)   # Demean
X = X.div(np.sqrt(np.sum(np.square(X), axis=1)), axis=0)
X.fillna(0, inplace=True)
similarity_matrix = np.dot(X, X.T)

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


ValueError: Unable to coerce to Series, length must be 5451: given 999