# 电影推荐系统

In [1]:
import numpy as np 
import pandas as pd 
import os
from surprise import Reader, Dataset, SVD
from surprise import KNNBaseline
from surprise import KNNWithMeans
from surprise import KNNBasic
from surprise.model_selection import cross_validate
import random
import csv

## 针对电影相似度的推荐算法

In [2]:
class Movie_KNN_recommender:
    def __init__(self, mode=0):
        self.index = pd.read_csv('../数据/movie_info.csv')
        self.reader = Reader()
        self.ratings = pd.read_csv('../数据/user_movie.csv')
        data = Dataset.load_from_df(self.ratings[['userId', 'movieId', 'rating']], self.reader)
        trainset = data.build_full_trainset()
        sim_options = {'name': 'pearson_baseline', 'user_based': False}
        # 建立不同模式下的KNN
        if mode == 0:
            self.algo = KNNBaseline(sim_options=sim_options)
        elif mode == 1:
            self.algo = KNNWithMeans(sim_options=sim_options)
        elif mode == 2:
            self.algo = KNNBasic(sim_options=sim_options)
        else:
            exit(0)
        self.algo.fit(trainset)

    def search_movie_neighbors(self, movieID, num=10):
        '''获取相似电影'''
        movie_id = self.algo.trainset.to_inner_iid(movieID)
        movie_neighbors = self.algo.get_neighbors(movie_id, k=num)
        movie_neighbors = [self.algo.trainset.to_raw_iid(inner_id) for inner_id in movie_neighbors]
        print(movie_neighbors)
        return movie_neighbors
    
    def recommend_movies(self, movieID, num=10):
        '''输出推荐的电影名称'''
        movie_recommend = self.search_movie_neighbors(movieID, num)
        recommending = []
        for i in movie_recommend:
            recommending.append(self.index[self.index.movieId == i]['title'])
        return recommending

In [3]:
test = Movie_KNN_recommender()
result = test.recommend_movies(59315, 10)
for i in result:
    print(i.values[0])

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
[77561, 89745, 87232, 5349, 2571, 3793, 58559, 68358, 50872, 69122]
Iron Man 2 (2010)
Avengers, The (2012)
X-Men: First Class (2011)
Spider-Man (2002)
Matrix, The (1999)
X-Men (2000)
Dark Knight, The (2008)
Star Trek (2009)
Ratatouille (2007)
Hangover, The (2009)


## 针对⽤户相似度的推荐算法

In [4]:
# 提取userid和movieid 
ratings = pd.read_csv('../数据/user_movie.csv')
usrid = []
movieid = []
for i in range(len(ratings['userId'])):
    if not ratings['userId'][i] in usrid:
        usrid.append(ratings['userId'][i])
    if not ratings['movieId'][i] in movieid:
        movieid.append(ratings['movieId'][i])

# 拆分训练集和测试集
train = []
valid = []
data_all = []
index = 0
# 所有数据转化为大列表data_all
for user in usrid:
    this_user = []
    if index >= len(ratings['userId']):
        break
    while ratings['userId'][index] == user:
        index += 1
        if index >= len(ratings['userId']):
            break
        temp = [ratings['userId'][index], ratings['movieId'][index], ratings['rating'][index]]
        this_user.append(temp)
    data_all.append(this_user)

threshold = 0.9
test_data = []
with open("../数据/train.csv", "w") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['userId', 'movieId','rating'])
    for this_user in data_all:
        length = len(this_user)
        for i in range(length):
            temp = random.random()
            if temp < threshold:
                writer.writerow(this_user[i])
            else:
                test_data.append(this_user[i])

with open("../数据/test.csv", "w") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['userId', 'movieId', 'rating'])
    for row in test_data:
        writer.writerow(row)

In [5]:
class Personal_KNN_recommender:
    def __init__(self, mode=0):
        self.index = pd.read_csv('../数据/movie_info.csv')
        self.reader = Reader()
        self.ratings = pd.read_csv('../数据/train.csv')
        self.testings = pd.read_csv('../数据/test.csv')
        data = Dataset.load_from_df(self.ratings[['userId', 'movieId', 'rating']], self.reader)
        trainset = data.build_full_trainset()
        sim_options = {'name': 'pearson_baseline', 'user_based': True}
        if mode == 0:
            self.algo = KNNBaseline(sim_options=sim_options)
        elif mode == 1:
            self.algo = KNNWithMeans(sim_options=sim_options)
        elif mode == 2:
            self.algo = KNNBasic(sim_options=sim_options)
        else:
            exit(0)
        self.userid = []
        for i in range(len(self.testings['userId'])):
            if not self.testings['userId'][i] in self.userid:
                self.userid.append(self.testings['userId'][i])
        self.algo.fit(trainset)

    def search_user_neighbors(self, usrID, num=10):
        '''获取相似用户'''
        user_id = self.algo.trainset.to_inner_uid(usrID)
        user_neighbors = self.algo.get_neighbors(user_id, k=num)
        user_neighbors = [self.algo.trainset.to_raw_uid(inner_id) for inner_id in user_neighbors]
        return user_neighbors


    def recommend_movies(self, usrID, num=5):
        '''获取推荐电影'''
        existed_movie = list(self.ratings[self.ratings.userId==usrID]['movieId'])
        similar_users = self.search_user_neighbors(usrID, num)
        movies_dict = {}
        for i in similar_users:
            movie = list(self.ratings[self.ratings.userId == i]['movieId'])
            vote = list(self.ratings[self.ratings.userId == i]['rating'])
            for j in range(len(vote)):
                if not (movie[j] in existed_movie):
                    if movie[j] in movies_dict.keys():
                        movies_dict[movie[j]] += vote[j]
                    else:
                        movies_dict[movie[j]] = vote[j]   # 从最相似的用户中挑选出没看过的电影，评分相加
        result = sorted(movies_dict.items(), key=lambda x: x[1], reverse=True)  # 对评分进行排序
        result = result[:num]  # 挑选出最高评分的10部电影
        recommending = []
        recommending_id = []
        for i in result:
            recommending.append(self.index[self.index.movieId==i[0]]['title'])
            recommending_id.append(i[0])
        return recommending, recommending_id  # 返回推荐的电影名字和id

In [6]:
test = Personal_KNN_recommender()
result = test.recommend_movies(66, 10)
for i in result:
    print(i)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.
[46    Usual Suspects, The (1995)
Name: title, dtype: object, 659    Godfather, The (1972)
Name: title, dtype: object, 898    Star Wars: Episode V - The Empire Strikes Back...
Name: title, dtype: object, 922    Godfather: Part II, The (1974)
Name: title, dtype: object, 1183    Men in Black (a.k.a. MIB) (1997)
Name: title, dtype: object, 31    Twelve Monkeys (a.k.a. 12 Monkeys) (1995)
Name: title, dtype: object, 1979    Star Wars: Episode I - The Phantom Menace (1999)
Name: title, dtype: object, 2259    Being John Malkovich (1999)
Name: title, dtype: object, 602    Dr. Strangelove or: How I Learned to Stop Worr...
Name: title, dtype: object, 909    Apocalypse Now (1979)
Name: title, dtype: object]
[50, 858, 1196, 1221, 1580, 32, 2628, 2997, 750, 1208]
