In [1]:
from operator import itemgetter
from flask import Flask
from flask import render_template
from flask import request
import pandas as pd
from scipy.stats import pearsonr
import tqdm

In [2]:
csv_path = './result.csv'


df = pd.read_csv(csv_path, encoding='utf-8')
df = df.set_index('name')
df = df.fillna('NaN')

item = df.index

n = 50
similarity = {}

In [3]:
def user_similarity(user1, user2):
    both_rated = {}
    for item in df.keys():
        try:
            if df.loc[user1][item] != 'NaN' and df.loc[user2][item] != 'NaN':
                both_rated[item] = [df.loc[user1][item], df.loc[user2][item]]
        except:
            continue

    number_of_ratings = len(both_rated)
    if number_of_ratings == 0:
        return 0

    user1_ratings = [df[k][user1] for k, v in both_rated.items() if df[k][user1] != 'NaN' and df[k][user2] != 'NaN']
    user2_ratings = [df[k][user2] for k, v in both_rated.items() if df[k][user1] != 'NaN' and df[k][user2] != 'NaN']

    cs = pearsonr(user1_ratings, user2_ratings)
    return cs[0]

In [4]:
def get_unseen_genres(user):
    unseen = []
    for item in df.keys():
        if df.loc[user][item] == 'NaN':
            unseen.append(item)
    return unseen

In [5]:
def get_weighted_average(items, n):
    res = {}
    for item in items:
        sum1 = 0
        sum2 = 0
        for i in range(n):
            if df.loc[similarity[i][0]][item] == 'NaN':
                n += 1
                continue
            sum1 += (similarity[i][1] * df.loc[similarity[i][0]][item])
            sum2 += similarity[i][1]
        res[item] = sum1 / sum2
    return res



In [6]:
app = Flask(__name__)

In [7]:
@app.route("/")
def main():
    return render_template("main.html")

In [8]:
@app.route("/result", methods=['GET'])
def result():
    global similarity
    similarity = {}

    user1 = request.args.get('name')

    if user1 not in df.index:
        return "찾고자 하는 사람이 DB에 존재하지 않습니다."
    for user2 in tqdm.tqdm(df.index):
        if user1 != user2:
            similarity[user2] = user_similarity(user1, user2)
    similarity = sorted(similarity.items(), key=itemgetter(1), reverse=True)
    similarity_n = []
    for i in range(5):
        similarity_n.append(similarity[i][0])

    unseen_genres = get_unseen_genres(user1)
    result = get_weighted_average(unseen_genres, n)
    result = sorted(result.items(), key=itemgetter(1), reverse=True)
    return render_template("res.html", similarity = similarity_n, name = user1, result = result, n = n)

In [None]:
if __name__ == "__main__":
    app.run()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
[33mPress CTRL+C to quit[0m
100%|██████████| 2805/2805 [00:16<00:00, 167.90it/s]
127.0.0.1 - - [25/Oct/2022 23:25:36] "GET /result?name=희희 HTTP/1.1" 200 -
127.0.0.1 - - [25/Oct/2022 23:25:50] "GET / HTTP/1.1" 200 -
