In [23]:
from hdfs.client import Client, InsecureClient
import redis
import json
import pyarrow as pa
import pyarrow.parquet as pq
import time

In [2]:
client = InsecureClient("http://192.168.1.176:50070", user='spark')

In [3]:
r = redis.StrictRedis(host='192.168.1.176', port=6379,decode_responses=True)

In [43]:
with client.read("/recipe/recipe1023_V9.json")as reader:
    data = json.load(reader)

In [None]:
with open("../tags.txt",encoding="utf-8")as f:
    tags = f.read()

In [None]:
with client.write('/recipe/tags.txt') as writer:
    writer.write(tags.encode("utf-8"))

In [5]:
with client.read("/recipe/tags.txt")as reader:
    tag = reader.read().decode("utf-8")

In [6]:
tags = set(tag.split(","))

In [7]:
len(tags)

104

In [None]:
# failed, FileNotFound Error
fs = pa.hdfs.connect(host="192.168.1.176",port=8020,user="spark")

In [44]:
len(data)

179412

## Redis architecture

---------mysql----------------------------------------------------------------------------------------------

user_set (check) key -> set ---> set(user_pool,user_id)

user refrigerator hash -> keys -> values --> hmset(name,key,value) ---> hmset(user_id,ingredient,quantity)

---------hadoop----------------------------------------------------------------------------------------------

tags and ingredients key -> sorted set --> zadd(name,element,score) ---> zadd(tag,{_id:like score})

recipe hash -> keys -> values --> hmset(name,key,value) ---> hmset(_id,{recipeName: recipe})


In [45]:
start = time.time()
# push to Redis
for each in data:
    _id = each["_id"]
    
    # tags-key sorted set
    tag_set = set(each["tags"])
    # tags是從網站分類抓下來的104種分類
    for one_tag in tag_set & tags:
        r.zadd(one_tag,{_id:each["like"]})
    
    # ingredient-key sorted set
    for one_ing in each["ing_dict"].keys():
        r.zadd(one_ing,{_id:each["like"]})
        
    # recipe hash table, _id: key
    if each["ingredient"]:
        ingredient_str = ""
        for ing in each["ingredient"]:
            ing_str = ing[0] + str(ing[1]) + str(ing[2])
            ingredient_str += ing_str + ","
    
    if each["seasoning"]:
        seasoning_str = ""
        for sea in each["seasoning"]:
            sea_str = sea[0] + str(sea[1]) + str(sea[2])
            seasoning_str += sea_str + ","
    
    value = {"recipe":each["recipe"],
             "url":each["url"],
             "image":each["image"],
             "like":each["like"],
             "cluster":each["cluster"],
             "ingredient":ingredient_str[:-1],
             "seasoning":seasoning_str[:-1],
             "quantity":each["quantity"],
             "time":each["time"]}
    r.hmset(_id,value)
    
#     print(value)
end = time.time()
print(f"{end-start} sec used.")



582.4049088954926 sec used.


In [53]:
'''
所有的key
recipe: 179412
ingredient:674
tags: 104
total: 180190
'''
len(r.keys())

180194

In [59]:
# 查詢tag或食材的相關食譜,預設排序從小到大 (依據like值當作sorted set value)
r.zrange('牛肉',0,-1)

['111201',
 '111346',
 '112544',
 '116214',
 '116307',
 '117841',
 '119493',
 '120232',
 '121091',
 '122290',
 '122481',
 '122975',
 '123203',
 '159702',
 '159723',
 '159735',
 '159745',
 '159771',
 '160009',
 '160060',
 '160078',
 '160087',
 '160093',
 '160103',
 '160218',
 '160276',
 '160446',
 '160567',
 '160798',
 '160843',
 '160908',
 '161389',
 '161484',
 '161497',
 '161525',
 '161730',
 '162059',
 '162085',
 '162143',
 '162159',
 '162208',
 '162223',
 '162537',
 '162570',
 '162945',
 '163056',
 '163700',
 '163724',
 '163778',
 '163784',
 '163792',
 '163816',
 '163828',
 '163834',
 '163912',
 '163973',
 '164212',
 '164334',
 '164341',
 '164512',
 '164569',
 '164878',
 '165592',
 '165593',
 '165628',
 '165845',
 '165886',
 '165891',
 '165904',
 '165906',
 '165923',
 '165951',
 '165983',
 '166174',
 '166186',
 '166235',
 '166399',
 '166627',
 '166766',
 '166830',
 '166955',
 '167043',
 '167079',
 '167152',
 '167222',
 '167541',
 '167585',
 '167652',
 '167712',
 '167760',
 '167963',

In [58]:
# 用食譜_id查詢該食譜
r.hgetall('38795')

{'ingredient': '牛肋條600gram,番茄250gram,洋蔥80gram,菇類140gram,月桂葉90gram',
 'seasoning': '水180ml,胡椒粉1.5ml,綜合香料1.5ml,醬油150ml,冰糖190gram,鹽0.5ml',
 'cluster': 'cluster2',
 'like': '17000',
 'recipe': '番茄燉牛肉好滋味',
 'quantity': '4',
 'url': 'https://icook.tw/recipes/171262',
 'image': 'https://imageproxy.icook.network/resize?height=600&nocrop=false&stripmeta=true&type=auto&url=http%3A%2F%2Ftokyo-kitchen.icook.tw.s3.amazonaws.com%2Fuploads%2Frecipe%2Fcover%2F171262%2F53748c7fcf463f82.jpg&width=800',
 'time': '60分鐘'}