In [1]:
import json
import os
import time
import bcrypt
import random
import datetime
from py2neo import *

In [2]:
areas = []
for line in open('area.txt', encoding='utf-8'):
  areas.append(line)
areas = [line.strip("\n") for line in areas]

helpers = []
for helper in open('helper.txt', encoding='utf-8'):
  helpers.append(helper)
helpers = [helper.strip("\n") for helper in helpers]


In [3]:
note_template   = 'note/result_note_{}{}.json'
list_template   = 'list/result_list_{}{}.json'
sql_template    = 'sql/{}.sql'
img_template    = 'http://sns-img-hw.xhscdn.com/{}'
key_template    =  '{}-{}'
post_template   = 'INSERT INTO post(post_id, post_collect_count, post_like_count, post_image_list, post_desc, post_title, post_publish_time) VALUES ({}, 0, 0, \'{}\', \'{}\', \'{}\', \'{}\');'
user_template   = 'INSERT INTO user(user_id, user_nickname, user_avatar, user_password, user_account, user_create_time, user_fan_count, user_follow_count, user_post_count, user_collect_post_count, user_collect_location_count) VALUES ({}, \'{}\', \'{}\', \'{}\', \'{}\', \'{}\', 0, 0, 0, 0, 0);'
account = 1753524606

def getNoteName(name, page):
  return note_template.format(name, page)

def getListName(name, page):
  return list_template.format(name, page)

def getSqlName(name):
  return sql_template.format(name)

def exist(name):
  return os.path.exists(name)

def getKeyName(name, id):
  return key_template.format(name, id)

def generate_random_str(randomlength=16):
  random_str = ''
  base_str = 'ABCDEFGHIGKLMNOPQRSTUVWXYZabcdefghigklmnopqrstuvwxyz,.'
  length = len(base_str) - 1
  for i in range(randomlength):
      random_str += base_str[random.randint(0, length)]
  tmp = bcrypt.hashpw(bytes(random_str, 'utf-8'), bcrypt.gensalt())
  return str(tmp, 'utf-8')

def randomtime(start = '2018-01-01 08:00:00', end = '2022-11-20 00:00:00',  frmt="%Y-%m-%d %H:%M:%S"):
  stime = datetime.datetime.strptime(start, frmt)
  etime = datetime.datetime.strptime(end, frmt)
  time_datetime=random.random() * (etime - stime) + stime
  time_str=time_datetime.strftime(frmt)
  return time_str

def rint(min, max):
  return random.randint(min, max)


In [4]:
user_ids = set()
class User:
  def __init__(self, spec):
    self.id = spec['user']['id']
    self.user_id = len(user_ids) + 1
    self.avatar = spec['user']['image']
    self.nickname = spec['user']['nickname']
    self.account = account
    self.password = generate_random_str()
    self.create_time = randomtime()
  
  def sql(self):
    return user_template.format(self.user_id, self.nickname.replace('\'', '\'\''), self.avatar, self.password, self.account, self.create_time)

  def create_user(self):
    return 'CREATE (:User{user_id:' + str(self.user_id) + ', user_nickname:"' + self.nickname.replace('"','\\"') + '", user_avatar:"' + self.avatar + '"});'
  

post_ids = set()
class Post:
  def __init__(self, note, spec):
    self.id = note['note']['id']
    self.post_id = len(post_ids) + 1
    self.publish_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(note['note']['timestamp']))
    self.title = note['note']['title']
    self.imgs = [img_template.format(img['trace_id']) for img in note['note']['images_list']]
    self.desc = spec['note_list'][0]['desc']
    self.head_tags = [tag['name'] for tag in spec['note_list'][0]['head_tags']]
    self.topics = [topic['name'] for topic in spec['note_list'][0]['topics']]

  def sql(self):
    return post_template.format(self.post_id, ', '.join(self.imgs), self.desc.replace('\'', '\'\''), self.title.replace('\'', '\'\''), self.publish_time)

  def create_post(self):
    img = ''
    if len(self.imgs) > 0:
      img = self.imgs[0]
    
    desc = self.desc
    if len(self.desc) >= 30:
      desc = self.desc[:30]
    return 'CREATE (:Post{post_id:' + str(self.post_id) + ', post_title:"' + self.title.replace('"', '\\"') + '", post_img:"' + img + '", post_desc:"' + desc.replace('"', '\\"') + '"});'

place_address = set()
class Place:
  def __init__(self, province, area, address):
    self.place_id = len(place_address) + 1
    self.province = province
    self.area = area
    self.address = address

  def create_place(self):
    return 'CREATE (:Place{place_id:' + str(self.place_id) + ', place_province:"' + self.province + '", place_area:"' + self.area + '", place_address:"' + self.address + '"});'

topic_names = set()
class Topic:
  def __init__(self, name):
    self.topic_id = len(topic_names) + 1
    self.name = name

  def create_topic(self):
    return 'CREATE (:Topic{topic_id:' + str(self.topic_id) + ', topic_name:"' + self.name.replace('"', '\\"') + '"});'

    

In [5]:
publish_template = 'MATCH (u:User),(p:Post) WHERE u.user_id = {} AND p.post_id = {} CREATE (u) -[:PUBLISH]-> (p);'
like_template = 'MATCH (u:User),(p:Post) WHERE u.user_id = {} AND p.post_id = {} CREATE (u) -[:LIKE]-> (p);'
collect_template = 'MATCH (u:User),(p:Post) WHERE u.user_id = {} AND p.post_id = {} CREATE (u) -[:COLLECT]-> (p);'
follow_template = 'MATCH (u1:User),(u2:User) WHERE u1.user_id = {} AND u2.user_id = {} CREATE (u1) -[:FOLLOW]-> (u2);'
store_template = 'MATCH (u:User),(p:Place) WHERE u.user_id = {} AND p.place_id = {} CREATE (u) -[:STORE]-> (p);'
suggest_template = 'MATCH (p:Post),(pl:Place) WHERE p.post_id = {} AND pl.place_id = {} CREATE (p) -[:SUGGEST]-> (pl);'
belong_template = 'MATCH (p:Post),(t:Topic) WHERE p.post_id = {} AND t.topic_id = {} CREATE (p) -[:BELONG]-> (t);'

def create_publish(user_id, post_id):
  return publish_template.format(user_id, post_id, user_id, post_id)


def create_like(user_id, post_id):
  return like_template.format(user_id, post_id, user_id, post_id)


def create_collect(user_id, post_id):
  return collect_template.format(user_id, post_id, user_id, post_id)


def create_follow(id1, id2):
  return follow_template.format(id1, id2, id1, id2)


def create_store(user_id, place_id):
  return store_template.format(user_id, place_id, user_id, place_id)


def create_suggest(post_id, place_id):
  return suggest_template.format(post_id, place_id, post_id, place_id)


def create_belong(post_id, topic_id):
  return belong_template.format(post_id, topic_id, post_id, topic_id)


In [6]:
users = []
posts = []
topics = []
places = []


for i in range(len(areas)):
  area = areas[i]
  place = Place("上海市", helpers[i], area)
  places.append(place)
  place_address.add(area)
  for page in range(1, 5):
    list_name = getListName(area, page)
    if exist(list_name):
      with open(list_name, 'r', encoding='utf-8') as list_file:
        cur_list = json.load(list_file)
      if cur_list['showapi_res_code'] != 0 or cur_list['showapi_res_body']['code'] != 0:
        continue
      listnotes = cur_list['showapi_res_body']['data']['items'] 
      note_name = getNoteName(area, page)
      if exist(note_name):
          with open(note_name, 'r', encoding='utf-8') as note_file:
            note_js = json.load(note_file)
          for note in listnotes:
            if 'note' in note.keys():
              keyname = getKeyName(area, note['note']['id'])
              if keyname in note_js.keys() and note_js[keyname]['showapi_res_code'] == 0 and len(note_js[keyname]['showapi_res_body']['data']) > 0:
                if note['note']['id'] not in post_ids:
                  post = Post(note, note_js[keyname]['showapi_res_body']['data'][0])
                  post_ids.add(post.id)
                  for topic in post.topics:
                    if topic not in topic_names:
                      t = Topic(topic)
                      topics.append(t)
                      topic_names.add(topic)
                  posts.append(post)
                  with open('cql/suggest.txt', 'a', encoding='utf-8') as sf:
                    sf.write(create_suggest(post.post_id, place.place_id))
                    sf.write('\n')
                if note_js[keyname]['showapi_res_body']['data'][0]['user']['id'] not in user_ids:
                  user = User(note_js[keyname]['showapi_res_body']['data'][0])
                  user_ids.add(user.id)
                  account += 1
                  users.append(user)           

In [7]:
users = users[:1500]

with open('sql/post.sql', 'w', encoding='utf-8') as sql_file:
  for post in posts:
    sql_file.write(post.sql())
    sql_file.write('\n\n')

with open('sql/user.sql', 'w', encoding = 'utf-8') as user_sql:
  for user in users:
    user_sql.write(user.sql())
    user_sql.write('\n')

with open('sql/topic.sql', 'w', encoding='utf-8') as topic_sql:
  for topic in topics:
    topic_sql.write('INSERT INTO topic VALUES({}, "{}");\n'.format(
        topic.topic_id, topic.name))

with open('sql/place.sql', 'w', encoding='utf-8') as place_sql:
  for place in places:
    place_sql.write('INSERT INTO place VALUES({}, "{}", "{}", "{}");\n'.format(
        place.place_id, place.province, place.area, place.address))


In [None]:
with open('sql/topic.sql', 'w', encoding='utf-8') as topic_sql:
  for topic in topics:
    topic_sql.write('INSERT INTO topic VALUES({}, "{}");\n'.format(
        topic.topic_id, topic.name))

with open('sql/place.sql', 'w', encoding='utf-8') as place_sql:
  for place in places:
    place_sql.write('INSERT INTO place VALUES({}, "{}", "{}", "{}");\n'.format(
        place.place_id, place.province, place.area, place.address))


In [8]:
user_fan_template = 'UPDATE user SET user_fan_count = user_fan_count + 1 WHERE user_id = {};'
user_follow_template = 'UPDATE user SET user_follow_count = user_follow_count + 1 WHERE user_id = {};'
user_post_template = 'UPDATE user SET user_post_count = user_post_count + 1 WHERE user_id = {};'
user_cpost_template = 'UPDATE user SET user_collect_post_count = user_collect_post_count + 1 WHERE user_id = {};'
user_clocation_template = 'UPDATE user SET user_collect_location_count = user_collect_location_count + 1 WHERE user_id = {};'
post_collect_template = 'UPDATE post SET post_collect_count = post_collect_count + 1 WHERE post_id = {};'
post_like_template = 'UPDATE post SET post_like_count = post_like_count + 1 WHERE post_id = {};'

def incr_user_fan(id):
  return user_fan_template.format(id)
def incr_user_follow(id):
  return user_follow_template.format(id)
def incr_user_post(id):
  return user_post_template.format(id)
def incr_user_cpost(id):
  return user_cpost_template.format(id)
def incr_user_clocation(id):
  return user_clocation_template.format(id)
def incr_post_collect(id):
  return post_collect_template.format(id)
def incr_post_like(id):
  return post_like_template.format(id)

In [9]:
with open('sql/update.sql', 'a', encoding='utf-8') as uf:

  with open('cql/publish.txt', 'a', encoding='utf-8') as pf:
    for post in posts:
      user = users[rint(0, len(users) - 1)]
      pf.write(create_publish(user.user_id, post.post_id))
      pf.write('\n')
      uf.write(incr_user_post(user.user_id))
      uf.write('\n')

  with open('cql/belong.txt', 'a', encoding='utf-8') as bf:
    for post in posts:
      for pt in post.topics:
        ok = False
        for topic in topics:
          if pt == topic.name:
            bf.write(create_belong(post.post_id, topic.topic_id))
            bf.write('\n')
            ok = True
            break
        if not ok:
          print('Error')

  for user in users:
    with open('cql/like.txt', 'a', encoding='utf-8') as lf:
      for _ in range(rint(20, 30)):
        l = posts[rint(0, len(posts) - 1)].post_id
        lf.write(create_like(user.user_id, l))
        lf.write('\n')
        uf.write(incr_post_like(l))
        uf.write('\n')
    with open('cql/collect.txt', 'a', encoding='utf-8') as cf:
      for _ in range(rint(5, 30)):
        l = posts[rint(0, len(posts) - 1)].post_id
        cf.write(create_collect(user.user_id, l))
        cf.write('\n')
        uf.write(incr_post_collect(l))
        uf.write('\n')
        uf.write(incr_user_cpost(user.user_id))
        uf.write('\n')
    with open('cql/follow.txt', 'a', encoding='utf-8') as ff:
      for _ in range(rint(5, 10)):
        index = rint(0, len(users) - 1)
        while user.user_id == users[index].user_id:
          index = rint(0, len(users) - 1)
        ff.write(create_follow(user.user_id, users[index].user_id))
        ff.write('\n')
        uf.write(incr_user_fan(users[index].user_id))
        uf.write('\n')
        uf.write(incr_user_follow(user.user_id))
        uf.write('\n')
    with open('cql/store.txt', 'a', encoding='utf-8') as sf:
      for _ in range(rint(10, 20)):
        sf.write(create_store(user.user_id, places[rint(0, len(places) - 1)].place_id))
        sf.write('\n')
        uf.write(incr_user_clocation(user.user_id))
        uf.write('\n')

In [10]:
with open('cql/user.txt', 'w', encoding='utf-8') as tf:
  for user in users:
    tf.write(user.create_user())
    tf.write('\n')
with open('cql/post.txt', 'w', encoding='utf-8') as tf:
  for post in posts:
    tf.write(post.create_post())
    tf.write('\n')
with open('cql/topic.txt', 'w', encoding='utf-8') as tf:
  for topic in topics:
    tf.write(topic.create_topic())
    tf.write('\n')
with open('cql/place.txt', 'w', encoding='utf-8') as tf:
  for place in places:
    tf.write(place.create_place())
    tf.write('\n')

In [11]:
graph = Graph('http://127.0.0.1:7474', name='neo4j', password='123456')

In [12]:
post_cypher = []
with open('cql/post.txt', 'r', encoding='utf-8') as f:
  line = f.readline()
  while line:
    line = line.strip('\n')
    while not line.endswith(';'):
      tmp = f.readline()
      if tmp:
        tmp = tmp.strip('\n')
        line += tmp
      else:
        break
    if line.endswith(';'):
      post_cypher.append(line)
    line = f.readline()
  
for ty in post_cypher:
  try:
    graph.run(ty)
  except BaseException:
    print(ty)
    break

In [13]:
for line in open('cql/user.txt', 'r', encoding='utf-8'):
  graph.run(line)
for line in open('cql/topic.txt', 'r', encoding='utf-8'):
  graph.run(line)
for line in open('cql/place.txt', 'r', encoding='utf-8'):
  graph.run(line)
for line in open('cql/suggest.txt', 'r', encoding='utf-8'):
  graph.run(line)
for line in open('cql/store.txt', 'r', encoding='utf-8'):
  graph.run(line)
for line in open('cql/publish.txt', 'r', encoding='utf-8'):
  graph.run(line)
for line in open('cql/like.txt', 'r', encoding='utf-8'):
  graph.run(line)
for line in open('cql/follow.txt', 'r', encoding='utf-8'):
  graph.run(line)
for line in open('cql/collect.txt', 'r', encoding='utf-8'):
  graph.run(line)
for line in open('cql/belong.txt', 'r', encoding='utf-8'):
  graph.run(line)