Skip to content

Commit

Permalink
Add phantomjs container
Browse files Browse the repository at this point in the history
  • Loading branch information
defaultcf committed Dec 18, 2016
1 parent d687a14 commit 16ac32e
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 6 deletions.
3 changes: 3 additions & 0 deletions app/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM python:3.5.2-alpine

RUN pip install Flask beautifulsoup4 selenium
50 changes: 46 additions & 4 deletions app/board.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
from urllib.request import urlopen
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec

driver = webdriver.Remote(
command_executor='http://phantomjs:8910',
desired_capabilities=DesiredCapabilities.PHANTOMJS
)
wait = WebDriverWait(driver, 5)

class Board:
def getList(self, mode:str) -> list:
Expand Down Expand Up @@ -36,17 +46,49 @@ def getThread(self, url:str) -> list:
@return list
"""
if not url: return []
url += "subback.html"

#html = urlopen(url)
html = requests.get(url, allow_redirects=True)
driver.get(url)
wait.until(ec.presence_of_all_elements_located)

url = driver.current_url + "subback.html"

html = urlopen(url)
soup = BeautifulSoup(html, "html.parser")
json = []

# baseタグがあった時の対策
if soup.find('base'):
url = soup.find('base').get("href")

if self.is2chNet(url):
return self.boardNet(soup, url)
else:
return self.boardSc(soup, url)


def is2chNet(self, url:str) -> bool:
"""
2ch.netかどうかを判別する
@param url 2ch.netかどうか判別したいurl
@return 2ch.netならTrue、それ以外ならFalse
"""
for x in ['2ch.net', 'bbspink.com']:
if x in url: return True

return False


def boardNet(self, soup, url:str):
json = []
for lists in soup.find_all('small'):
for a in lists.find_all('a'):
href = urljoin(url, a.get("href"))
json.append({"thread": a.text, "url": href})

return json


def boardSc(self, soup, url:str):
json = []
lists = soup.find('small')
for a in lists.find_all('a'):
href = urljoin(url, a.get("href"))
Expand Down
13 changes: 11 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,21 @@ services:

app:
container_name: piccolle_app
image: python:3.5.2-alpine
build: ./app
volumes:
- ./app/:/mnt/
links:
- phantomjs
expose:
- 5000
working_dir: /mnt
environment:
- FLASK_APP=main.py
command: ash -c 'pip install Flask beautifulsoup4 && python -m flask run --host=0.0.0.0'
command: python -m flask run --host=0.0.0.0

phantomjs:
container_name: piccolle_phantomjs
image: wernight/phantomjs:2.1.1
expose:
- 8910
command: phantomjs --webdriver=8910

0 comments on commit 16ac32e

Please sign in to comment.