In [34]:
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy import Table, select
from sqlalchemy.ext.hybrid import hybrid_property
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime

from Models.helper import *

import pandas as pd
import bcrypt
import re

In [35]:
app = Flask(__name__)

db_config = {
    'SQLALCHEMY_DATABASE_URI' : 'sqlite:///Database/doubi_database.db',
    'SQLALCHEMY_TRACK_MODIFICATIONS' : False
}

app.config.update(db_config)
db = SQLAlchemy(app)

In [36]:
followers = Table('followers', db.metadata,
    db.Column('followed_id', db.String(32), db.ForeignKey('user.u_id')),
    db.Column('follower_id', db.String(32), db.ForeignKey('user.u_id'))
)

blocked_users = Table('blocked_users', db.metadata,
    db.Column('blocked_id', db.String(32), db.ForeignKey('user.u_id')),
    db.Column('blocker_id', db.String(32), db.ForeignKey('user.u_id')),
)

users_wish_film = Table('users_wish_film', db.metadata,
    db.Column('user_id', db.String(32), db.ForeignKey('user.u_id')),
    db.Column('film_id', db.String(32), db.ForeignKey('film.f_id'))
)

bad_word = db.Table('bad_word', db.metadata,
    db.Column('w_id', db.Integer, primary_key=True, autoincrement=True),
    db.Column('word', db.String(32), nullable=False))

class User(db.Model):
    __tablename__ = 'user'
    
    u_id = db.Column(db.String(32), primary_key=True, nullable=False, unique=True, default=u_id_generator)
    username = db.Column(db.String(80), nullable=False, unique=True)
    password_hash = db.Column(db.Text, nullable=False)
    email = db.Column(db.String(80), nullable=False, unique=True)
    url_photo = db.Column(db.Text, nullable=True)
    is_admin = db.Column(db.Boolean, nullable=False, default=False)
    is_blocked = db.Column(db.Boolean, nullable=False, default=False)

    created_time = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
    updated_time = db.Column(db.DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow)

    followed = db.relationship('User', 
                                secondary=followers,
                                primaryjoin=(followers.c.follower_id == u_id),
                                secondaryjoin=(followers.c.followed_id == u_id),
                                backref=db.backref('followers', lazy='dynamic'),
                                lazy='dynamic')

    blocked = db.relationship('User',
                                secondary=blocked_users,
                                primaryjoin=(blocked_users.c.blocker_id == u_id),
                                secondaryjoin=(blocked_users.c.blocked_id == u_id),
                                backref=db.backref('blockers', lazy='dynamic'),
                                lazy='dynamic')
    
    reviews = db.relationship('Review', backref='user', lazy='dynamic')

    review_likes = db.relationship('Review_Like', backref='user', lazy='dynamic')
    review_dislikes = db.relationship('Review_Dislike', backref='user', lazy='dynamic')
    
    wish = db.relationship('Film', secondary=users_wish_film, backref='user', lazy='dynamic')
    
    @property
    def password(self):
        raise AttributeError('password is not a readable attribute')

    @password.setter
    def password(self, password):
        salt = bcrypt.gensalt()
        self.password_hash = bcrypt.hashpw(password.encode('utf-8'), salt)
    
    def verify_password(self, password):
        return bcrypt.checkpw(password.encode('utf-8'), self.password_hash)
    
    def __repr__(self):
        return '<User %r>' % self.username


    

class Film(db.Model):
    __tablename__ = 'film'

    f_id = db.Column(db.String(32), primary_key=True, nullable=False, unique=True, default=f_id_generator)
    title = db.Column(db.String(80), nullable=False)
    genre = db.Column(db.String(80), nullable=False)
    year = db.Column(db.Integer, nullable=True)
    run_time = db.Column(db.String(16), nullable=True)
    rating_imdb = db.Column(db.Float, nullable=True)
    overview = db.Column(db.String(500), nullable=True)
    director = db.Column(db.String(80), nullable=True)
    actor = db.Column(db.String(200), nullable=True)
    url_poster = db.Column(db.Text, nullable=True)
    rating_doubi = db.Column(db.Float, nullable=True)

    created_time = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
    updated_time = db.Column(db.DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow)

    reviews = db.relationship('Review', backref='film', lazy='dynamic')
    
    @hybrid_property
    def rating(self):
        reviews = self.reviews.all()
        if len(reviews) == 0:
            return 0
        else:
            return round(sum(review.rating for review in reviews) / len(reviews), 1)
        
    @rating.expression
    def rating(cls):
        return select(func.avg(Review.rating)).where(Review.f_id == cls.f_id)
    
    @property
    def rating_distribution(self):
        reviews = self.reviews.all()
        rating_distribution = {x: 0 for x in range(0, 5)}
        for review in reviews:
            rating_distribution[review.rating] = rating_distribution.get(review.rating, 0) + 1
        return rating_distribution

    @property
    def genres(self):
        return [genre.strip() for genre in self.genre.split(',')]
    
    @property
    def actors(self):
        return [actor.strip() for actor in self.actor.split(',')]
    
    
    def rating_customized(self, current_user):
        reviews = self.reviews.all()
        blocked_id = [x.u_id for x in current_user.blocked.all()]
        reviews = [x for x in reviews if x.u_id not in blocked_id]
        if len(reviews) == 0:
            return 0
        else:
            return round(sum(review.rating for review in reviews) / len(reviews), 1)
        
    def rating_distribution_customized(self, current_user):
        reviews = self.reviews.all()
        blocked_id = [x.u_id for x in current_user.blocked.all()]
        reviews = [x for x in reviews if x.u_id not in blocked_id]
        rating_distribution = {x: 0 for x in range(0, 5)}
        for review in reviews:
            rating_distribution[review.rating] = rating_distribution.get(review.rating, 0) + 1
        return rating_distribution
    
    def __repr__(self):
        return '<Film %r>' % self.title
    


class Review(db.Model):
    __tablename__ = 'review'

    r_id = db.Column(db.String(32), primary_key=True, nullable=False, unique=True, default=r_id_generator)
    u_id = db.Column(db.String(32), db.ForeignKey('user.u_id'), nullable=False)
    f_id = db.Column(db.String(32), db.ForeignKey('film.f_id'), nullable=False)
    content = db.Column(db.String(500), nullable=True)
    rating = db.Column(db.Integer, nullable=False)
    bad_word = db.Column(db.Boolean, nullable=False, default=False)

    created_time = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)
    updated_time = db.Column(db.DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow)

    likes = db.relationship('Review_Like', backref='review', lazy='dynamic')
    dislikes = db.relationship('Review_Dislike', backref='review', lazy='dynamic')
    
    @property
    def like(self):
        return self.likes.count()
    
    @property
    def dislike(self):
        return self.dislikes.count()


class Review_Like(db.Model):
    __tablename__ = 'review_like'

    r_id = db.Column(db.String(32), db.ForeignKey('review.r_id'), primary_key=True, nullable=False)
    u_id = db.Column(db.String(32), db.ForeignKey('user.u_id'), primary_key=True, nullable=False)

    created_time = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)


class Review_Dislike(db.Model):
    __tablename__ = 'review_dislike'

    r_id = db.Column(db.String(32), db.ForeignKey('review.r_id'), primary_key=True, nullable=False)
    u_id = db.Column(db.String(32), db.ForeignKey('user.u_id'), primary_key=True, nullable=False)

    created_time = db.Column(db.DateTime, nullable=False, default=datetime.utcnow)

In [37]:
ENGLISH_STOP_WORDS = set([
    'a',
    'about',
    'above',
    'across',
    'after',
    'afterwards',
    'again',
    'against',
    'ain',
    'all',
    'almost',
    'alone',
    'along',
    'already',
    'also',
    'although',
    'always',
    'am',
    'among',
    'amongst',
    'amoungst',
    'amount',
    'an',
    'and',
    'another',
    'any',
    'anyhow',
    'anyone',
    'anything',
    'anyway',
    'anywhere',
    'are',
    'aren',
    'around',
    'as',
    'at',
    'back',
    'be',
    'became',
    'because',
    'become',
    'becomes',
    'becoming',
    'been',
    'before',
    'beforehand',
    'behind',
    'being',
    'below',
    'beside',
    'besides',
    'between',
    'beyond',
    'bill',
    'both',
    'bottom',
    'but',
    'by',
    'call',
    'can',
    'cannot',
    'cant',
    'co',
    'con',
    'could',
    'couldn',
    'couldnt',
    'cry',
    'd',
    'de',
    'describe',
    'detail',
    'did',
    'didn',
    'do',
    'does',
    'doesn',
    'doing',
    'don',
    'done',
    'down',
    'due',
    'during',
    'each',
    'eg',
    'eight',
    'either',
    'eleven',
    'else',
    'elsewhere',
    'empty',
    'enough',
    'etc',
    'even',
    'ever',
    'every',
    'everyone',
    'everything',
    'everywhere',
    'except',
    'few',
    'fifteen',
    'fify',
    'fill',
    'find',
    'fire',
    'first',
    'five',
    'for',
    'former',
    'formerly',
    'forty',
    'found',
    'four',
    'from',
    'front',
    'full',
    'further',
    'get',
    'give',
    'go',
    'had',
    'hadn',
    'has',
    'hasn',
    'hasnt',
    'have',
    'haven',
    'having',
    'he',
    'hence',
    'her',
    'here',
    'hereafter',
    'hereby',
    'herein',
    'hereupon',
    'hers',
    'herself',
    'him',
    'himself',
    'his',
    'how',
    'however',
    'hundred',
    'i',
    'ie',
    'if',
    'in',
    'inc',
    'indeed',
    'interest',
    'into',
    'is',
    'isn',
    'it',
    'its',
    'itself',
    'just',
    'keep',
    'last',
    'latter',
    'latterly',
    'least',
    'less',
    'll',
    'ltd',
    'm',
    'ma',
    'made',
    'many',
    'may',
    'me',
    'meanwhile',
    'might',
    'mightn',
    'mill',
    'mine',
    'more',
    'moreover',
    'most',
    'mostly',
    'move',
    'much',
    'must',
    'mustn',
    'my',
    'myself',
    'name',
    'namely',
    'needn',
    'neither',
    'never',
    'nevertheless',
    'next',
    'nine',
    'no',
    'nobody',
    'none',
    'noone',
    'nor',
    'not',
    'nothing',
    'now',
    'nowhere',
    'o',
    'of',
    'off',
    'often',
    'on',
    'once',
    'one',
    'only',
    'onto',
    'or',
    'other',
    'others',
    'otherwise',
    'our',
    'ours',
    'ourselves',
    'out',
    'over',
    'own',
    'part',
    'per',
    'perhaps',
    'please',
    'put',
    'rather',
    're',
    's',
    'same',
    'see',
    'seem',
    'seemed',
    'seeming',
    'seems',
    'serious',
    'several',
    'shan',
    'she',
    'should',
    'shouldn',
    'show',
    'side',
    'since',
    'sincere',
    'six',
    'sixty',
    'so',
    'some',
    'somehow',
    'someone',
    'something',
    'sometime',
    'sometimes',
    'somewhere',
    'still',
    'such',
    'system',
    't',
    'take',
    'ten',
    'than',
    'that',
    'the',
    'their',
    'theirs',
    'them',
    'themselves',
    'then',
    'thence',
    'there',
    'thereafter',
    'thereby',
    'therefore',
    'therein',
    'thereupon',
    'these',
    'they',
    'thick',
    'thin',
    'third',
    'this',
    'those',
    'though',
    'three',
    'through',
    'throughout',
    'thru',
    'thus',
    'to',
    'together',
    'too',
    'top',
    'toward',
    'towards',
    'twelve',
    'twenty',
    'two',
    'un',
    'under',
    'until',
    'up',
    'upon',
    'us',
    've',
    'very',
    'via',
    'was',
    'wasn',
    'we',
    'well',
    'were',
    'weren',
    'what',
    'whatever',
    'when',
    'whence',
    'whenever',
    'where',
    'whereafter',
    'whereas',
    'whereby',
    'wherein',
    'whereupon',
    'wherever',
    'whether',
    'which',
    'while',
    'whither',
    'who',
    'whoever',
    'whole',
    'whom',
    'whose',
    'why',
    'will',
    'with',
    'within',
    'without',
    'won',
    'would',
    'wouldn',
    'y',
    'yet',
    'you',
    'your',
    'yours',
    'yourself',
    'yourselves'
])

In [38]:
films = Film.query.all()
films = [[
    film.f_id,
    film.genre,
    film.director,
    film.actor,
    film.title,
    film.overview,
] for film in films]
df = pd.DataFrame(films, columns=['f_id', 'genre', 'director', 'actor', 'title', 'overview'])

In [39]:
df['kwd'] = ''

for index, row in df.iterrows():
    
    overview = row['overview']
    # remove punctuation
    overview = re.sub(r'[^\w\s]', '', overview)
    # convert to lowercase
    overview = overview.lower()
    # remove stopwords
    overview = [word for word in overview.split() if word not in ENGLISH_STOP_WORDS]
    # print(overview)
    row['kwd'] = overview

df['kwd'].head()

0    [imprisoned, men, bond, number, years, finding...
1    [organized, crime, dynastys, aging, patriarch,...
2    [menace, known, joker, wreaks, havoc, chaos, p...
3    [early, life, career, vito, corleone, 1920s, n...
4    [jury, holdout, attempts, prevent, miscarriage...
Name: kwd, dtype: object

In [40]:
df['genre'] = df['genre'].map(lambda x: x.split(','))
df['actor'] = df['actor'].map(lambda x: x.split(',')[:3])
df['director'] = df['director'].map(lambda x: x.split(','))
for index, row in df.iterrows():
    row['genre'] = [x.lower().replace(' ','') for x in row['genre']]
    row['actor'] = [x.lower().replace(' ','') for x in row['actor']]
    row['director'] = [x.lower().replace(' ','') for x in row['director']]
df

Unnamed: 0,f_id,genre,director,actor,title,overview,kwd
0,b6c27b9bba493012834fa0f4f64dd519,[drama],[frankdarabont],"[timrobbins, morganfreeman, bobgunton]",The Shawshank Redemption,Two imprisoned men bond over a number of years...,"[imprisoned, men, bond, number, years, finding..."
1,9e0f2212f7b43f0299ed273e076ae572,"[crime, drama]",[francisfordcoppola],"[marlonbrando, alpacino, jamescaan]",The Godfather,An organized crime dynasty's aging patriarch t...,"[organized, crime, dynastys, aging, patriarch,..."
2,1e6393419260322cb338ae629d3c0cb9,"[action, crime, drama]",[christophernolan],"[christianbale, heathledger, aaroneckhart]",The Dark Knight,When the menace known as the Joker wreaks havo...,"[menace, known, joker, wreaks, havoc, chaos, p..."
3,f64262d36cfb3737b2107a6973584c41,"[crime, drama]",[francisfordcoppola],"[alpacino, robertdeniro, robertduvall]",The Godfather: Part II,The early life and career of Vito Corleone in ...,"[early, life, career, vito, corleone, 1920s, n..."
4,d8de113e940e37cc981c514bb53d81e0,"[crime, drama]",[sidneylumet],"[henryfonda, leej.cobb, martinbalsam]",12 Angry Men,A jury holdout attempts to prevent a miscarria...,"[jury, holdout, attempts, prevent, miscarriage..."
...,...,...,...,...,...,...,...
995,5367dabd1e4f3468b38fd56bdbc01021,"[comedy, drama, romance]",[blakeedwards],"[audreyhepburn, georgepeppard, patricianeal]",Breakfast at Tiffany's,A young New York socialite becomes interested ...,"[young, new, york, socialite, interested, youn..."
996,39940adf0eb5375b96cb8748cf061c2a,"[drama, western]",[georgestevens],"[elizabethtaylor, rockhudson, jamesdean]",Giant,Sprawling epic covering the life of a Texas ca...,"[sprawling, epic, covering, life, texas, cattl..."
997,d547c00412023be5bef54a6ae1371263,"[drama, romance, war]",[fredzinnemann],"[burtlancaster, montgomeryclift, deborahkerr]",From Here to Eternity,"In Hawaii in 1941, a private is cruelly punish...","[hawaii, 1941, private, cruelly, punished, box..."
998,673913d08a18381ca646656f14c0e84d,"[drama, war]",[alfredhitchcock],"[tallulahbankhead, johnhodiak, walterslezak]",Lifeboat,Several survivors of a torpedoed merchant ship...,"[survivors, torpedoed, merchant, ship, world, ..."


In [41]:
df['bag_of_words'] = ''
columns = ['genre', 'director', 'actor', 'kwd']
for index, row in df.iterrows():
    words = ''
    for col in columns:
        words += ' '.join(row[col]) + ' '
    row['bag_of_words'] = words
    
df = df[['title','bag_of_words']]
df

Unnamed: 0,title,bag_of_words
0,The Shawshank Redemption,drama frankdarabont timrobbins morganfreeman b...
1,The Godfather,crime drama francisfordcoppola marlonbrando al...
2,The Dark Knight,action crime drama christophernolan christianb...
3,The Godfather: Part II,crime drama francisfordcoppola alpacino robert...
4,12 Angry Men,crime drama sidneylumet henryfonda leej.cobb m...
...,...,...
995,Breakfast at Tiffany's,comedy drama romance blakeedwards audreyhepbur...
996,Giant,drama western georgestevens elizabethtaylor ro...
997,From Here to Eternity,drama romance war fredzinnemann burtlancaster ...
998,Lifeboat,drama war alfredhitchcock tallulahbankhead joh...


In [42]:
count = CountVectorizer()
count_matrix = count.fit_transform(df['bag_of_words'])
cosine_sim = cosine_similarity(count_matrix, count_matrix)
print(cosine_sim)


  (0, 2074)	1
  (0, 2728)	1
  (0, 7435)	1
  (0, 4996)	1
  (0, 872)	1
  (0, 3452)	1
  (0, 4789)	1
  (0, 883)	1
  (0, 5248)	1
  (0, 8107)	1
  (0, 2610)	1
  (0, 6877)	1
  (0, 2392)	1
  (0, 6050)	1
  (0, 165)	1
  (0, 1440)	1
  (0, 1803)	1
  (1, 2074)	1
  (1, 1610)	2
  (1, 2721)	1
  (1, 4640)	1
  (1, 339)	1
  (1, 3689)	1
  (1, 5351)	1
  (1, 2124)	1
  :	:
  (999, 1610)	1
  (999, 6253)	1
  (999, 6528)	1
  (999, 4563)	2
  (999, 3190)	1
  (999, 7407)	1
  (999, 4113)	1
  (999, 147)	1
  (999, 5064)	1
  (999, 7066)	1
  (999, 7577)	1
  (999, 308)	1
  (999, 6412)	1
  (999, 229)	2
  (999, 6604)	1
  (999, 4412)	1
  (999, 6976)	1
  (999, 7029)	1
  (999, 7602)	1
  (999, 3509)	1
  (999, 7002)	1
  (999, 6293)	1
  (999, 4506)	1
  (999, 4473)	1
  (999, 1560)	1
[[1.         0.05564149 0.04950738 ... 0.05423261 0.05057217 0.        ]
 [0.05564149 1.         0.14048787 ... 0.05129892 0.04783649 0.08240856]
 [0.04950738 0.14048787 1.         ... 0.04564355 0.04256283 0.03666178]
 ...
 [0.05423261 0.05129892 0.0

In [43]:
indices = pd.Series(df['title'])
indices

0      The Shawshank Redemption
1                 The Godfather
2               The Dark Knight
3        The Godfather: Part II
4                  12 Angry Men
                 ...           
995      Breakfast at Tiffany's
996                       Giant
997       From Here to Eternity
998                    Lifeboat
999                The 39 Steps
Name: title, Length: 1000, dtype: object

In [44]:
def recommend(title, cosine_sim = cosine_sim):
    recommended_movies = []
    idx = indices[indices == title].index[0]
    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)
    top_10_indices = list(score_series.iloc[1:11].index)
    
    for i in top_10_indices:
        recommended_movies.append(list(df['title'])[i])
        
    return recommended_movies


In [45]:
recommend('The Dark Knight Rises')

['Batman Begins',
 'The Incredibles',
 'The Dark Knight',
 'Die Hard: With a Vengeance',
 'Interstellar',
 'Shichinin no samurai',
 'Yip Man',
 'The Blues Brothers',
 'First Blood',
 'Mad Max 2']

In [46]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [47]:
reviews = Review.query.all()
reviews = [[
    review.f_id,
    review.u_id,
    review.rating,
] for review in reviews]

df = pd.DataFrame(reviews, columns=['f_id', 'u_id', 'rating'])

In [50]:
matrix = df.pivot_table(index='u_id', columns='f_id', values='rating')

In [52]:
user_similarity = matrix.T.corr()
user_similarity

u_id,008ef55e4cbb3347a7816d9145011d53,008fdf556dc23c1aa9e8ad5c2a6c60db,00cca7767cba3ae3a797f46eb1ff4140,00ccd6470c0f3f8d96f3f554401407a7,00e85ef45c9e32169a16e576af71020d,00ead050a7e13c1d8de6a65ba316a7d5,0103f36967bc3656be5d8e59c5917945,01390892caaa311f8b5e071b3dd4bd5a,01bd4421f5d536f997d46f6ea3fe9b92,025bc11a2c64356bba7f72344f539d57,...,fd44142ea86138f29ed3fd545775a480,fd7616a0f81c3cd1a8da2c065a42bf3f,fda7f019076438978ee7e751eab5b2d3,fe0bfde5fecd34dea20e249e7316a9af,fe2a2f541b80370a9fe1b2d9eaa61e3e,feff35e6bd5c32fe8d6fd5b8c253442d,ff44a07034fa32399278d051ca2dafa4,ff6eed84dfab39109d36ef7e0893d552,ff95195eb23c3d43b06d165876e2a99d,ffe1a4d1d0023589b14424541fe65a91
u_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
008ef55e4cbb3347a7816d9145011d53,1.0,,,,,,,,,,...,,,,,,,,,,
008fdf556dc23c1aa9e8ad5c2a6c60db,,1.0,,,,,,,,,...,,,,,,,,,,
00cca7767cba3ae3a797f46eb1ff4140,,,1.0,,,,,,,,...,,,,,,,,,,
00ccd6470c0f3f8d96f3f554401407a7,,,,1.0,,,,,,,...,,,,,,,,,,
00e85ef45c9e32169a16e576af71020d,,,,,1.000000,,,,,,...,,,,,1.0,-0.891042,,-0.296432,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
feff35e6bd5c32fe8d6fd5b8c253442d,,,,,-0.891042,,,,,,...,,,,,-1.0,1.000000,,1.000000,,
ff44a07034fa32399278d051ca2dafa4,,,,,,,,,,,...,,,,,,,1.0,,,
ff6eed84dfab39109d36ef7e0893d552,,,,,-0.296432,,,,,,...,,,,,-0.5,1.000000,,1.000000,,
ff95195eb23c3d43b06d165876e2a99d,,,,,,,,,,,...,,,,,,,,,1.0,


In [61]:
def predict(uid, iid, ratings_matrix, user_similar):
    '''
    预测给定用户对给定物品的评分值
    :param uid: 用户ID
    :param iid: 物品ID
    :param ratings_matrix: 用户-物品评分矩阵
    :param user_similar: 用户两两相似度矩阵
    :return: 预测的评分值
    '''
    print("开始预测用户<%s>对电影<%s>的评分..."%(uid, iid))
    # 1. 找出uid用户的相似用户
    similar_users = user_similar[uid].drop([uid]).dropna()
    # 相似用户筛选规则：正相关的用户
    similar_users = similar_users.where(similar_users>0).dropna()
    if similar_users.empty is True:
        raise Exception("用户<%s>没有相似的用户" % uid)

    # 2. 从uid用户的近邻相似用户中筛选出对iid物品有评分记录的近邻用户
    ids = set(ratings_matrix[iid].dropna().index)&set(similar_users.index)
    finally_similar_users = similar_users.loc[list(ids)]

    # 3. 结合uid用户与其近邻用户的相似度预测uid用户对iid物品的评分
    numerator = 0    # 评分预测公式的分子部分的值
    denominator = 0    # 评分预测公式的分母部分的值
    for sim_uid, similarity in finally_similar_users.iteritems():
        # 近邻用户的评分数据
        sim_user_rated_movies = ratings_matrix.loc[sim_uid].dropna()
        # 近邻用户对iid物品的评分
        sim_user_rating_for_item = sim_user_rated_movies[iid]
        # 计算分子的值
        numerator += similarity * sim_user_rating_for_item
        # 计算分母的值
        denominator += similarity

    # 计算预测的评分值并返回
    predict_rating = numerator/denominator
    print("预测出用户<%s>对电影<%s>的评分：%0.2f" % (uid, iid, predict_rating))
    return round(predict_rating, 2)

In [62]:
predict('b1392eecf57e302db360584c016212b9','151026f27e4e38899a7bcb03feb7a1b3',matrix,user_similarity)

开始预测用户<b1392eecf57e302db360584c016212b9>对电影<151026f27e4e38899a7bcb03feb7a1b3>的评分...


Exception: 用户<b1392eecf57e302db360584c016212b9>没有相似的用户