In [7]:
# predict: source: https://surprise.readthedocs.io/en/stable/FAQ.html
from collections import defaultdict
from tkinter import *
import json
import pandas as pd
from surprise import Dataset
from surprise import Reader
from surprise.prediction_algorithms import KNNWithMeans
from surprise import accuracy
from surprise.model_selection import GridSearchCV
from tkinter import *

In [8]:
def read_data():
    rent_list = list()
    category_set=set()
    with open('renttherunway_final_data.json') as json_data:
        # read all data in
        for line in json_data:
            rent_entry = json.loads(line.strip())
            # uid, iid, rating: we only keep those lines that have ratings.
            # This is called sparse-matrix form
            rent_extracted_entry = dict()
            if rent_entry['rating']:
                rent_extracted_entry['user_id'] = rent_entry['user_id']
                rent_extracted_entry['item_id'] = rent_entry['item_id']
                rent_extracted_entry['rating'] = float(rent_entry['rating'])
                # first filter data according to users' choice: category
                rent_extracted_entry['category'] = rent_entry['category']
                category_set.add(rent_entry['category'])
                rent_list.append(rent_extracted_entry)
    print("Category:")
    print(category_set)
    print(len(rent_list))
    return rent_list

In [9]:
from tkinter import *

def sel():
    #selection = "You selected " + str(v.get())
    selection = v.get()
    if selection == 1:
        selection = 'jacket'
    elif selection == 2:
        selection = "down"
    elif selection == 3:
        selection = "buttondown"
    elif selection == 4:
        selection = "crewneck"
    elif selection == 5:
        selection = "gown"
    elif selection == 6:
        selection = "hoodie"
    elif selection == 7:
        selection = "caftan"
    elif selection == 8:
        selection = "sheath"
    elif selection == 9:
        selection = "leggings"
    elif selection == 10:
        selection = "tank"
    elif selection == 11:
        selection = "ballgown"
    
    #print(selection)
    return selection

"""
'caftan', 'sheath', 'leggings', 'tank', 'ballgown', 
'tunic', 'turtleneck', 'skirts', 'parka', 'sweatershirt', 'jeans', 'romper', 'sweater', 'pullover', 'jogger', 'tee', 
'trench', 'trousers', 'jacket', 'sweatshirt', 'kaftan', 'suit', 'shift', 'mini', 'coat', 'trouser', 'cape', 'top', 
'overalls', 'blouson', 't-shirt', 'culotte', 'jumpsuit', 'shirtdress', 'duster', 'kimono', 'combo', 'pant', 'culottes', 
'shirt', 'blazer', 'frock', 'pants', 'midi', 'skirt', 'print', 'overcoat', 'for', 'dress', 'cardigan', 'sweatpants', 
'cami', 'skort', 'peacoat', 'knit', 'maxi', 'tight', 'blouse', 'vest', 'bomber', 'poncho', 'henley'
"""
    
root = Tk() 
v = IntVar() 
Radiobutton(root, text='Legging', variable=v, value=1, command=sel).pack(anchor=W) 
Radiobutton(root, text='Down', variable=v, value=2, command=sel).pack(anchor=W) 
Radiobutton(root, text='Buttondown', variable=v, value=3, command=sel).pack(anchor=W)
Radiobutton(root, text='Crewneck', variable=v, value=4, command=sel).pack(anchor=W) 
Radiobutton(root, text='Gown', variable=v, value=5, command=sel).pack(anchor=W) 
Radiobutton(root, text='Hoodie', variable=v, value=6, command=sel).pack(anchor=W)
Radiobutton(root, text='Caftan', variable=v, value=7, command=sel).pack(anchor=W) 
Radiobutton(root, text='Sheath', variable=v, value=8, command=sel).pack(anchor=W) 
Radiobutton(root, text='Leggings', variable=v, value=9, command=sel).pack(anchor=W)
Radiobutton(root, text='Tank', variable=v, value=10, command=sel).pack(anchor=W) 
Radiobutton(root, text='Ballgown', variable=v, value=11, command=sel).pack(anchor=W)
mainloop()

In [10]:
def filter_needs(rent_list, category):
    # return a dataframe, each line <uid, iid, rating> (raw values: strings)
    rent_df = pd.DataFrame()
    for entry in rent_list:
        if entry['category'] == category:
            rent_df = rent_df.append(entry, ignore_index=True)
    return rent_df[['user_id', 'item_id', 'rating']]

In [11]:
def grid_search(my_df):
    reader = Reader(rating_scale=(1, 10))
    data = Dataset.load_from_df(my_df, reader)

    param_grid = {'k': [1, 3, 5, 10, 15, 20],
                  'min_k': [1, 3, 5, 10],
                  'sim_options': {'name': ['pearson'],
                                  'min_support': [1, 3, 5, 10],
                                  'user_based': [True]}}
    # knnwithmeans decide the formula for prediction
    # sim_options decide the formula for calculating similarities
    # min_k: if there are not enough neighbors, prediction is the mean of all R_ui.
    # sim_options: min_support: min number of common items: if common items < min_support, sim(u, v)=0
    knn_grid_search = GridSearchCV(KNNWithMeans, param_grid, measures=['rmse', 'mae'], cv=5)
    # data is training + validation, GridSearchCV separates training and validation for me
    # when fit, fit(trainval)
    # get test data through build_anti_testset: users and items are known, but ratings are unknown.
    knn_grid_search.fit(data)
    print(knn_grid_search.best_score['rmse'])
    print(knn_grid_search.best_params['rmse'])
    algo = knn_grid_search.best_estimator['rmse']
    print(algo)
    trainvalset = data.build_full_trainset()
    algo.fit(trainvalset)
    # Then predict ratings for all pairs (u, i) that are NOT in the training set.
    testset = trainvalset.build_anti_testset()
    predictions = algo.test(testset)
    return algo, trainvalset, predictions

In [12]:
def get_top_n(my_df, n=10):
    '''Return the top-N recommendation for each user from a set of predictions.
    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''
    # predictions(list of Prediction objects): The list of predictions, as
    # returned by the test method of an algorithm.
    algo, trainvalset, predictions = grid_search(my_df)
    # First map the predictions to each user. A dictionary of lists.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

In [14]:
def main():
    rent_list = read_data()

    # get category from user
    category = 'legging'
    #category = sel()
    my_df = filter_needs(rent_list, category)
    print(my_df)
    top_n = get_top_n(my_df, n=10)
    print(top_n['139671'])


main()

Category:
set([u'mini', u'vest', u'buttondown', u'gown', u'tunic', u'overcoat', u'jeans', u'tee', u'down', u'kaftan', u'peacoat', u'sheath', u'sweatshirt', u'bomber', u'skirt', u'jumpsuit', u'blouse', u'jacket', u'frock', u'tank', u'shirt', u'for', u'jogger', u'dress', u'top', u'pant', u'kimono', u'pullover', u'tight', u'ballgown', u'maxi', u'suit', u'print', u'parka', u'pants', u'sweatpants', u'trouser', u'poncho', u'culotte', u'caftan', u'combo', u'legging', u't-shirt', u'turtleneck', u'crewneck', u'cardigan', u'leggings', u'hoodie', u'cape', u'overalls', u'culottes', u'knit', u'blazer', u'duster', u'coat', u'midi', u'blouson', u'romper', u'trench', u'henley', u'cami', u'shirtdress', u'shift', u'trousers', u'skort', u'sweatershirt', u'sweater', u'skirts'])
192462
   user_id  item_id  rating
0   139671  2064568    10.0
1   486996  2064568    10.0
2    81759  2064568    10.0
3   517457  2064568     6.0
4   881924  2064568    10.0
5   240639  2064568     8.0
6   836090  2064568    10.0


Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.

In [15]:
# Get User ID Interface
import tkinter as tk

def get_userID():
    print(str(e1.get()))
                                       
master = tk.Tk()
tk.Label(master, text="User ID").grid(row=0)

e1 = tk.Entry(master)
e1.grid(row=0, column=1)
tk.Button(master, text='Quit', command=master.quit).grid(row=3, column=0, sticky=tk.W, pady=4)
tk.Button(master, text='Enter', command=get_userID).grid(row=3, column=1, sticky=tk.W, pady=4)

tk.mainloop()

123
