Skip to content

Commit

Permalink
Parse free company members
Browse files Browse the repository at this point in the history
  • Loading branch information
Sami committed Oct 23, 2015
1 parent 9aefc94 commit d2d522a
Showing 1 changed file with 34 additions and 39 deletions.
73 changes: 34 additions & 39 deletions api/scrapers/free_company.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from api import db
from api.models.free_company import FreeCompany
from api.scrapers.context_managers import HTMLFromLoadstone
from api.scrapers.character import scrape_character

import datetime
import re
Expand All @@ -15,8 +16,9 @@ def scrape_free_company(lodestone_id, basics_only=False):
fc = FreeCompany(id=lodestone_id)

scrape_free_company_basics(fc, html)
# if not basics_only:
# scrape_free_company_members(fc, html)
if not basics_only:
total_members = int(html.xpath('//tr[3]/td/text()')[0])
scrape_free_company_members(fc, total_members)

return fc

Expand All @@ -42,40 +44,33 @@ def scrape_free_company_basics(fc, html):
db.session.commit()


# def scrape_free_company_members(fc, html):
# total_members = int(tree.xpath('//tr[3]/td/text()')[0])
# page_num = 1
#
# while total_members > 0:
# total_members -= 50
# page_num += 1
#
# headers = {'User-Agent': USER_AGENT}
# uri = 'http://na.finalfantasyxiv.com/lodestone/freecompany/{lodestone_id}/member/?page={page_num}'.format(
# lodestone_id=lodestone_id,
# page_num=page_num)
# page = requests.get(uri, headers=headers)
# assert page.status_code == 200
#
# tree = html.fromstring(page.text)
#
# character_ids = map(
# lambda x: x.attrib['href'].split('/')[3],
# tree.xpath('//div[@class="name_box"]/a'))
#
# for character_id in character_ids:
# if character_id == '8774791':
# fc.members.add(scrape_character(character_id))
#
# # Grab items from database / grab in parallel
# # character_threads = []
# # for character_id in character_ids:
# # try:
# # fc.members.add(Character.objects.get(lodestone_id=lodestone_id))
# # except ObjectDoesNotExist:
# # thread = CharacterThread(character_id)
# # thread.start()
# # character_threads.append(thread)
# # for thread in character_threads:
# # fc.members.add(thread.join())
# pass
def scrape_free_company_members(fc, total_members):

page_num = 1
while total_members > 0:
total_members -= 50
page_num += 1

url = 'http://na.finalfantasyxiv.com/lodestone/freecompany/{lodestone_id}/member/?page={page_num}'.format(
lodestone_id=fc.id,
page_num=page_num)
with HTMLFromLoadstone(url) as html:

character_ids = map(
lambda x: x.attrib['href'].split('/')[3],
html.xpath('//div[@class="name_box"]/a'))

for character_id in character_ids:
fc.members_id.append(scrape_character(character_id, skip_free_company_parse=True))

# Grab items from database / grab in parallel
# character_threads = []
# for character_id in character_ids:
# try:
# fc.members.add(Character.objects.get(lodestone_id=lodestone_id))
# except ObjectDoesNotExist:
# thread = CharacterThread(character_id)
# thread.start()
# character_threads.append(thread)
# for thread in character_threads:
# fc.members.add(thread.join())

0 comments on commit d2d522a

Please sign in to comment.