Skip to content

Commit

Permalink
Adds User profile scraping and room info scraping, with simple tests.
Browse files Browse the repository at this point in the history
Moves some logic from client to room.

Adds simple docstrings with epydoc annotations to public fields and
methods of Client. Also adds epydoc as a dev dependency, and adds
`make epydocs`.

It probably isn't necessary to add this documentation to our internal
methods, and I won't bother with browser for now, but I think it's
useful to have for our user-friendly interface. (Both for users directly
and through IDEs that recognize the type annotations, like PyCharm CE.)

resolves #52 - let's forget about slugs and message sets for now. We can
reply to messages and a weak set of current messages might be pointless.

ref #65, #43 - event/message iterators/contexts are still outstanding.
  • Loading branch information
jeremyBanks committed May 15, 2014
1 parent 3aaf6b0 commit 826cd98
Show file tree
Hide file tree
Showing 14 changed files with 323 additions and 143 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ coverage.xml
# Sphinx documentation
docs/_build/

# epydoc output
html/
epydocs/

# IntelliJ IDEA Project Files
.idea/*
.iml
Expand Down
18 changes: 12 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,33 @@ default: test run-example

WARGS = -W default::Warning

run-example: install-dependencies
run-example: install-dependencies PHONY
python $(WARGS) examples/chat.py

run-web-example: install-dependencies
run-web-example: install-dependencies PHONY
python $(WARGS) examples/web_viewer.py

test: install-dependencies
test: install-dependencies PHONY
python $(WARGS) -m pytest

test-coverage: install-dependencies
test-coverage: install-dependencies PHONY
python -m coverage run --branch -m pytest
python -m coverage report --include 'chatexchange/*'

install-dependencies:
install-dependencies: PHONY
# This also creates a link to `chatexchange/` in the Python
# environment, which is neccessary for the other files to be
# able to find it.
rm -rf src/*.egg-info
pip install -e .

clean:
epydocs: PHONY
epydoc chatexchange --html -o epydocs \
--top ChatExchange.chatexchange --no-frames --no-private --verbose

clean: PHONY
rm -rf src/*.egg-info
find . -type f -name '*.pyc' -delete
find . -type d -name '__pycache__' -delete

PHONY:
4 changes: 4 additions & 0 deletions chatexchange/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,7 @@
Browser = browser.Browser

Client = client.Client

__all__ = [
'browser', 'users', 'messages', 'rooms', 'events', 'client',
'Browser', 'Client']
71 changes: 71 additions & 0 deletions chatexchange/browser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# encoding: utf-8
import json
import logging
import threading
Expand All @@ -13,6 +14,9 @@


class Browser(object):
"""
An interface for scraping and making requests to Stack Exchange chat.
"""
user_agent = ('ChatExchange/0.dev '
'(+https://github.com/Manishearth/ChatExchange)')

Expand Down Expand Up @@ -55,6 +59,9 @@ def _request(

response.raise_for_status()

# XXX: until throttling is implemented everywhere in Client, at least add some delay here.
time.sleep(0.75)

return response

def get(self, url, data=None, headers=None, with_chat_root=True):
Expand Down Expand Up @@ -485,6 +492,70 @@ def _get_star_data(self, root_soup, include_starred_by_you):

return data

def get_profile(self, user_id):
"""
Returns the data from the profile page for user_id.
"""
profile_soup = self.get_soup('users/%s' % (user_id,))

name = profile_soup.find('h1').text

is_moderator = bool(u'♦' in profile_soup.select('.user-status')[0].text)
message_count = int(profile_soup.select('.user-message-count-xxl')[0].text)
room_count = int(profile_soup.select('.user-room-count-xxl')[0].text)

return {
'name': name,
'is_moderator': is_moderator,
'message_count': message_count,
'room_count': room_count
}

def get_room_info(self, room_id):
"""
Returns the data from the room info page for room_id.
"""
info_soup = self.get_soup('rooms/info/%s' % (room_id,))

name = info_soup.find('h1').text

description = str(
info_soup.select('.roomcard-xxl p')[0]
).partition('>')[2].rpartition('<')[0].strip()

message_count = int(info_soup.select('.room-message-count-xxl')[0].text)
user_count = int(info_soup.select('.room-user-count-xxl')[0].text)

parent_image_soups = info_soup.select('.roomcard-xxl img')
if parent_image_soups:
parent_site_name = parent_image_soups[0]['title']
else:
parent_site_name = None

owner_user_ids = []
owner_user_names = []

for card_soup in info_soup.select('#room-ownercards .usercard'):
user_id, user_name = self.user_id_and_name_from_link(card_soup.find('a'))
owner_user_ids.append(user_id)
owner_user_names.append(user_name)

tags = []

for tag_soup in info_soup.select('.roomcard-xxl .tag'):
tags.append(tag_soup.text)

return {
'name': name,
'description': description,
'message_count': message_count,
'user_count': user_count,
'parent_site_name': parent_site_name,
'owner_user_ids': owner_user_ids,
'owner_user_names': owner_user_names,
'tags': tags
}


class RoomSocketWatcher(object):
def __init__(self, browser, room_id, on_activity):
Expand Down
Loading

0 comments on commit 826cd98

Please sign in to comment.