In [1]:
import csv
import math
import json
from fuzzywuzzy import fuzz
from collections import OrderedDict
from pandas.io.json import json_normalize



In [2]:
def loadItemDataset(filename):
    brandIdItemsId={}
    with open(filename,'r') as data:
        parseData=json.load(data)
        for item in parseData:
            brandId=item["brand_id"]
            itemId=item["item_id"]
            if brandId not in brandIdItemsId:
                brandIdItemsId[brandId] = []
            brandIdItemsId[brandId].append(itemId)
    return brandIdItemsId
def loadRateDataset(filename):
    with open(filename,'r') as data:
        parseData=json.load(data)
        return parseData["brand_id"]

def loadBrandDataset(filename):
    brandGlossary={}
    with open(filename,'r') as data:
        parseData=json.load(data)
        for brand in parseData["brands"]:
            brandId=brand["brand_id"]
            curBrandList=[]
            for feature in list(brand["features"].keys()):
                curBrandList.append(brand["features"][feature])
            brandGlossary[brandId]=curBrandList
        return brandGlossary

In [8]:
def euclideanDistance(instance1,instance2):
    distance = 0
    featuresNum=len(instance1)
    for x in range(featuresNum):
        distance+=pow((float(instance1[x])-float(instance2[x])),2)
    return math.sqrt(distance)
def getNeighbors(dataset,testInstance,k):
    distances=[]
    for brandId,brandFeatures in dataset.items():
        dist=euclideanDistance(testInstance,brandFeatures)
        distances.append((brandId,dist))
    distances.sort(key=lambda tup:tup[1])
    neighbors=[]
    for x in range(min(k,len(distances))):
        neighbors.append(distances[x][0])
    return neighbors

In [21]:
def KnnBrandRecommender(items,highRatings,normalizedBrands,k,filterItemIDs=None):
    try:
        brandItems=loadItemDataset(items) #itemFile
        userRated=loadRateDataset(highRatings) #rateFile
        brandGlossary=loadBrandDataset(normalizedBrands) #brandGlossary
    except KeyError:
        raise ValueError("Incorrect file list!")
    brandCount={}
    for brandId in userRated:
        testInstance = brandGlossary[brandId]
        neighbors = getNeighbors(brandGlossary,testInstance,k)
        for neighbor in neighbors:
            if neighbor in brandCount:
                brandCount[neighbor] += 1
            else:
                brandCount[neighbor] = 1
    sortedBrandIds=[k for k in sorted(brandCount,key=brandCount.get,reverse=True)]
    desiredItems=[]
    for brandId in sortedBrandIds:
        if brandId in brandItems:
            desiredItems+=brandItems[brandId]
    if filterItemIDs == None:
        return desiredItems
    else:
        return [value for value in desiredItems if value in filterItemIDs]

KnnBrandRecommender("./jsons/items.json","./jsons/high_ratings.json","./jsons/normalized_brand.json",10)

[10, 20, 30, 40, 50]

# For futher use!

In [None]:
def KnnRecommender(itemFile,rateFile,userId,k,filterItemIDs):
    itemDataset=loadItemDataset(itemFile,16)
    filteredItemDataset=[]
    for filterItemID in filterItemIDs:
        for item in itemDataset:
            if int(item[0]) == filterItemID:
                filteredItemDataset.append(item)
                continue
    userRated=loadRateDataset(rateFile,userId)
    resultDict={}
    for itemId in userRated:
        print("rate itemId:" + str(itemId))
        for filteredItem in filteredItemDataset:
            if int(filteredItem[0]) == itemId:
                testInstance = filteredItem
                print("test Instance=")
                print(testInstance)
                neighbors = getNeighbors(filteredItemDataset,testInstance,k)
                for neighbor in neighbors:
                    if frozenset(neighbor) in resultDict:
                        resultDict[frozenset(neighbor)] += 1
                    else:
                        resultDict[frozenset(neighbor)] = 1
                continue
    sortedResult=[(list(k),resultDict[k]) for k in sorted(resultDict,key=resultDict.get,reverse=True)]
    return sortedResult
def knnRecommender(itemFile,rateFile,userId,k,filterItemIDs):
    result = KnnRecommender(itemFile,rateFile,userId,k,filterItemIDs)
    while len(result) < k:
        print(k)
        k+=1
        result = KnnRecommender(itemFile,rateFile,userId,k+1,filterItemIDs)
    return result