Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 45 #82

Merged
merged 12 commits into from
Oct 6, 2017
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Contributing Guidelines
1. Please maintain proper commenting etiqiutte. Pretend that this is a work environment and that the comments are being reviewed by coworkers.
1. Please maintain proper commenting etiquette. Pretend that this is a work environment and that the comments are being reviewed by coworkers.
1. Ensure that your changes compile/work before opening a PR.
1. Follow style guidelines where applicable.
19 changes: 9 additions & 10 deletions PittAPI/course.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@

import requests
import re
from typing import List, Dict, Tuple, Any
from bs4 import BeautifulSoup, SoupStrainer
from typing import Any, Dict, List, Tuple
from bs4 import BeautifulSoup, SoupStrainer, Tag, ResultSet
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rebase conflicts... I just tend to put things in alphabetical order.


URL = 'http://www.courses.as.pitt.edu/'

Expand All @@ -39,8 +39,7 @@
PROGRAMS = ['CLST', 'ENV', 'FILMST', 'MRST', 'URBNST', 'SELF', 'GSWS']
DAY_PROGRAM, SAT_PROGRAM = 'CGSDAY', 'CGSSAT'

def get_courses(term, code):

def get_courses(term: str, code: str) -> List[Dict[str,str]]:
"""Returns a list of dictionaries containing all courses queried from code."""
col_headers, course_data = _retrieve_courses_from_url(
url=URL + _get_subject_query(code, term)
Expand Down Expand Up @@ -74,16 +73,15 @@ def _validate_term(term: str) -> str:
raise ValueError("Invalid term")


# TODO: Write type definition for bs objects (annotated as Any for now)
def _retrieve_courses_from_url(url: str) -> Tuple[List[str], Any]:
def _retrieve_courses_from_url(url: str) -> Tuple[List[str],ResultSet]:
"""Returns a tuple of column header keys and list of course data."""
page = requests.get(url)
soup = BeautifulSoup(page.text, 'lxml', parse_only=SoupStrainer(['table', 'tr', 'th']))
courses = _extract_header(soup.findAll('th')), soup.findAll("tr", {"class": ["odd", "even"]})
return courses


def _extract_header(data: Any) -> List[str]:
def _extract_header(data: List[Any]) -> List[str]:
"""Extracts column headers and converts it into keys for a future dictionary."""
header = []
for tag in data:
Expand All @@ -95,7 +93,7 @@ def _extract_header(data: Any) -> List[str]:
return header


def _extract_course_data(header: List[str], course: Any) -> Dict[str,str]:
def _extract_course_data(header: List[str], course: Tag) -> Dict[str,str]:
"""Constructs a dictionary from column header labels(subject, class number, etc.) and course data."""
data = {}
for item, value in zip(header, course.findAll('td')):
Expand Down Expand Up @@ -124,7 +122,7 @@ def get_class(term: str, class_number: str) -> Dict[str,Any]:
return dict(class_details, **{'class_number': class_number, 'term': term})


def _extract_description(text: str) -> Dict[str,Any]:
def _extract_description(text: str) -> Dict[str,str]:
"""Extracts class description from web page"""
soup = BeautifulSoup(text, 'lxml', parse_only=SoupStrainer(['td']))
description = {
Expand All @@ -133,7 +131,8 @@ def _extract_description(text: str) -> Dict[str,Any]:

return description

def _extract_details(text: str) -> Dict[str,str]:

def _extract_details(text: str) -> Dict[str,Any]:
"""Extracts class number, classroom, section, date, and time from web page"""
soup = BeautifulSoup(text, 'lxml', parse_only=SoupStrainer(['td']))
row = soup.findAll('td', {'class': 'style1'})
Expand Down
1 change: 0 additions & 1 deletion PittAPI/lab.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import requests
from bs4 import BeautifulSoup

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normally I wouldn't impose my style on the project, but I already had this committed and this stuck around after rebasing...

from typing import List, Dict

session = requests.session()
Expand Down
24 changes: 12 additions & 12 deletions PittAPI/laundry.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import requests
from bs4 import BeautifulSoup

from typing import Dict, List
from typing import Dict, List, Union

session = requests.session()

Expand Down Expand Up @@ -77,7 +77,7 @@ def get_status_simple(building_name: str) -> Dict[str,str]:
return di


def get_status_detailed(building_name: str) -> List[Dict[str,str]]:
def get_status_detailed(building_name: str) -> List[Dict[str,Union[str,int]]]:
building_name = building_name.upper()

# Get a cookie
Expand All @@ -88,34 +88,34 @@ def get_status_detailed(building_name: str) -> List[Dict[str,str]]:
# Get the weird laundry data
headers = {"Cookie": "PHPSESSID={}".format(cookie)}
response = requests.get("http://www.laundryview.com/dynamicRoomData.php?location={}".format(
location_dict[building_name]), headers=headers).text
resp_split = response.split('&')[3:]
location_dict[building_name]), headers=headers)
resp_split = response.text.split('&')[3:]

cleaned_resp = []
cleaned_resp = [] # type: List[List[str]]
for status_string in resp_split:
if "machine" not in status_string:
continue
machine_name = status_string[:status_string.index('=')].replace('Status', '') #type: str
status_string = status_string[status_string.index('=') + 1:].strip()

old_machine_split = status_string.split("\n") #type: List[str]
old_machine_split[0] += machine_name
status_lines = status_string.split("\n") #type: List[str]
status_lines[0] += machine_name
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is another instance where I normally wouldn't have renamed this variable, but my changes conflicted with someone else who renamed it, and I think the name status_lines is clearer.


try:
old_machine_split[1] += machine_name
status_lines[1] += machine_name
except IndexError:
pass

machine_split = [x.split(':') for x in old_machine_split]
cleaned_resp.append(machine_split[0])
split_status_lines = [x.split(':') for x in status_lines]
cleaned_resp.append(split_status_lines[0])
try:
cleaned_resp.append(machine_split[1])
cleaned_resp.append(split_status_lines[1])
except IndexError:
pass

cleaned_resp = [x for x in cleaned_resp if len(x) == 10]

di = [] # type: List[Dict[str,str]]
di = [] # type: List[Dict[str,Union[str,int]]]
for machine in cleaned_resp:
time_left = -1
machine_name = "{}_{}".format(machine[9], machine[3])
Expand Down
47 changes: 24 additions & 23 deletions PittAPI/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import requests
from html.parser import HTMLParser
from typing import List, Dict
from typing import Any, Dict, List

LIBRARY_URL = "http://pitt.summon.serialssolutions.com/api/search"
sess = requests.session()
Expand All @@ -28,79 +28,80 @@ class HTMLStrip(HTMLParser):
def __init__(self):
super().__init__()
self.reset()
self.data = [] #type: List[str]
def handle_data(self, d: str):
self.data = []
def handle_data(self, d: str) -> None:
self.data.append(d)
def get_data(self) -> str:
return ''.join(self.data)

def get_documents(query: str, page: int=1) -> Dict[str,str]:

def get_documents(query: str, page: int=1) -> Dict[str,Any]:
"""Return ten resource results from the specified page"""
if page > 50:
# Max supported page number is 50
page = 50

payload = {'q': query, 'pn': page}
resp = sess.get(LIBRARY_URL, params=payload)
resp = resp.json()
resp_json = resp.json()

results = _extract_results(resp)
results = _extract_results(resp_json)
return results


def get_document_by_bookmark(bookmark):
def get_document_by_bookmark(bookmark: str) -> Dict[str,Any]:
"""Return resource referenced by bookmark"""
payload = {'bookMark': bookmark}
resp = sess.get(LIBRARY_URL, params=payload)
resp = resp.json()
resp_json = resp.json()

if resp.get("errors"):
for error in resp.get("errors"):
if resp_json.get("errors"):
for error in resp_json.get("errors"):
if error['code'] == 'invalid.bookmark.format':
raise ValueError("Invalid bookmark")

results = _extract_results(resp)
results = _extract_results(resp_json)
return results

def _strip_html(html):
def _strip_html(html: str) -> str:
strip = HTMLStrip()
strip.feed(html)
return strip.get_data()

def _extract_results(json):
def _extract_results(json: Dict[str,Any]) -> Dict[str,Any]:
results = {
'page_count': json['page_count'],
'record_count': json['record_count'],
'page_number': json['query']['page_number'],
'facet_fields': _extract_facets(json['facet_fields']),
'documents': _extract_documents(json['documents'])
}
}

return results

def _extract_documents(documents):
def _extract_documents(documents: List[Dict[str,Any]]) -> List[Dict[str,Any]]:
new_docs = []

keep_keys = ['bookmarks', 'content_types', 'subject_terms', 'languages', \
'isbns', 'full_title', 'publishers', 'publication_years', 'discipline', \
'authors', 'abstracts', 'link', 'lc_call_numbers', 'has_fulltext', \
'fulltext_link']
keep_keys = {'bookmarks', 'content_types', 'subject_terms', 'languages',
'isbns', 'full_title', 'publishers', 'publication_years', 'discipline',
'authors', 'abstracts', 'link', 'lc_call_numbers', 'has_fulltext',
'fulltext_link'}

for doc in documents:
new_doc = {}
for key in doc.keys() & keep_keys:
for key in set(doc.keys()) & keep_keys:
new_doc[key] = doc[key]
new_doc['full_title'] = _strip_html(new_doc['full_title'])
new_docs.append(new_doc)

return new_docs

def _extract_facets(facet_fields):
facets = {}
def _extract_facets(facet_fields: List[Dict[str,Any]]) -> Dict[str,List[Dict[str,Any]]]:
facets = {} # type: Dict[str,List[Dict[str,Any]]]
for facet in facet_fields:
facets[facet['display_name']] = []
for count in facet['counts']:
facets[facet['display_name']].append({'value': count['value'], \
facets[facet['display_name']].append({'value': count['value'],
'count': count['count']})

return facets
4 changes: 2 additions & 2 deletions tests/lab_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ def test_make_status(self):
self.assertEqual(open[keys[0]], 'open')

for key in keys[1:]:
self.assertEqual(closed[key], 0)
self.assertEqual(open[key], 1)
self.assertEqual(closed[key], '0')
self.assertEqual(open[key], '1')
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


def test_extract_machines(self):
data = '123 hello_world, 456 macOS, 789 cool, 3 nice'
Expand Down