In [138]:
import requests
from lxml import html
from os import environ
from collections import OrderedDict
from datetime import datetime
from decimal import Decimal

config = {}
for key in ('TFL_USERNAME', 'TFL_PASSWORD'):
    config[key] = environ[key]

login_endpoint = "https://account.tfl.gov.uk/Login"
base_url = "https://contactless.tfl.gov.uk"
mycards_endpoint = base_url + "/MyCards"
view_card_endpoint = base_url + "/Card/View"
statements_endpoint = base_url + "/Statements/TravelStatement"
statements_filter_endpoint = base_url + "/Statements/Refresh"
card_id_param = "pi"
mondo_card_nickname = "Mondo"
 
def element_at(l, index, default=None):
    try:
        return l[index]
    except:
        return default
    
def write_out(text):
    with open('out', 'w') as textfile:
        textfile.write(text)

session_requests = requests.session()

result = session_requests.get(base_url)
tree = html.fromstring(result.text)
auth_token = tree.xpath("//input[@name='AppId']/@value")[0]

payload = {
    "UserName" : config['TFL_USERNAME'],
    "Password" : config['TFL_PASSWORD'],
    "AppId" : auth_token
}

result = session_requests.post(login_endpoint, data = payload, headers = dict(referer = base_url))
tree = html.fromstring(result.content)

# bad password
error = element_at(tree.xpath('//div[@class="field-validation-error"]'), 0)
if error is not None:
    raise ValueError(error.text_content().strip())

# find mondo card
result = session_requests.get(mycards_endpoint, headers = dict(referer = base_url))
tree = html.fromstring(result.content)
mondo_card_xpath = '//a[@data-pageobject="mycards-card-cardlink" '\
                   'and .//span[@class="view-card-nickname" and '\
                   'contains(.,"'+ mondo_card_nickname +'")]]/@href'
mondo_card = element_at(tree.xpath(mondo_card_xpath), 0)
if mondo_card is None:
    raise ValueError('Cannot find card with nickname ' + mondo_card_nickname)

mondo_card_id = mondo_card.split(card_id_param + "=")[1]

# get incomplete journey warning
result = session_requests.get(view_card_endpoint, params={card_id_param : mondo_card_id}, 
                              headers = dict(referer = mycards_endpoint))
tree = html.fromstring(result.content)
incomplete_journey_on_card_xpath = '//div[@data-pageobject="card-notification"]'\
                                   '/h5[@class="text-warning" and contains(., "Incomplete Journey")]'
incomplete_journey = element_at(tree.xpath(incomplete_journey_on_card_xpath), 0)
if incomplete_journey is not None:
    print("Incomplete Journey(s) on card")

# get statements
result = session_requests.get(statements_endpoint, params={card_id_param : mondo_card_id}, 
                              headers = dict(referer = mycards_endpoint))
tree = html.fromstring(result.content)
refresh_token_xpath = '//form[@id="filters"]/input[@name="__RequestVerificationToken"]/@value'
filter_token = element_at(tree.xpath(refresh_token_xpath), 0)
statements = tree.xpath('//select[@id="SelectedStatementPeriod"]/option/@value[not(.="7")]')
def create_payload(period):
    return {
        "__RequestVerificationToken" : filter_token,
        "SelectedStatementType" : "Payments",
        "SelectedStatementPeriod" : period,
        "PaymentCardId" : mondo_card_id
    }

def payments_from_statements(statements_payload):
    result = session_requests.post(statements_filter_endpoint, data=statements_payload,
                                  headers = dict(referer = statements_endpoint))
    tree = html.fromstring(result.content)
    statements_xpath = '//div[@data-pageobject="travelstatement-paymentsummary"]'
    statement_divs = tree.xpath(statements_xpath)
    payments = (div_to_payment(div) for div in statement_divs)
    return ((p.date, p) for p in payments)

payments_dict = OrderedDict()
for period in statements:
    payments = payments_from_statements(create_payload(period))
    payments_dict.update(payments)
    

class Payment():
    def __init__(self, cost, date, journeys = None, warning = None, autocompleted = None, capped = None):
        self.cost = cost
        self.date = date
        self.journeys = journeys
        self.warning = warning
        self.autocompleted = autocompleted
        self.capped = capped
        
    def __repr__(self):
        return "Payment(cost: {0}, date: {1}, warning: {2}, autocompleted: {3}, "\
               "capped: {4},\njourneys: {5})".format(self.cost, self.date, self.warning, 
                                                     self.autocompleted, self.capped, self.journeys)

class Journey():
    def __init__(self, station_from, station_to, time, cost, notes=None):
        self.station_from = station_from
        self.station_to = station_to
        self.time = time
        self.cost = cost
        self.fare = cost * -1
        self.notes = notes
        
    def __repr__(self):
        return "Journey(station_from: {0}, station_to: {1}, time: {2}, "\
               "fare: {3}, cost: {4}, notes: {5})".format(self.station_from, self.station_to, 
                                                           self.time, self.fare, self.cost, self.notes)

def text_at_xpath(d, xpath_str):
        element = element_at(d.xpath(xpath_str), 0)
        if element is None:
            return None
        return element.text.strip()

def div_to_journeys(div):
    return [div_to_journey(j) for j in div.xpath('.//a[@data-pageobject="statement-detaillink"]')]

def text_to_cost(text):
    credit = text.startswith('+')
    amount_str = text.strip("+-£*")
    amount = int(Decimal(amount_str) * 100)
    if credit:
        return amount
    return amount * -1
    

def div_to_journey(j):
    journey_from = text_at_xpath(j, './/span[@data-pageobject="journey-from"]')
    journey_to = text_at_xpath(j, './/span[@data-pageobject="journey-to"]')
    journey_time_str = text_at_xpath(j, './/span[@data-pageobject="journey-time"]')
    journey_fare_str = text_at_xpath(j, './/span[@data-pageobject="journey-fare"]')   
    journey_fare = text_to_cost(journey_fare_str)
    notes = text_at_xpath(j, './/span[@data-pageobject="journey-to-from"]')
    
    return Journey(journey_from, journey_to, journey_time_str, journey_fare, notes)


def div_to_payment(div):
    cost_str = text_at_xpath(div, './/span[contains(@data-pageobject,"price")]')
    cost = text_to_cost(cost_str)

    def parse_date(date_str, format_str):
        return datetime.strptime(date_str, format_str).date()    
    
    date_str = text_at_xpath(div, './/span[@data-pageobject="statement-date"]')
    date = parse_date(date_str, "%d/%m/%Y")
    
    p = Payment(cost, date)
    
    def has_element(d, xpath_str):
        return element_at(d.xpath(xpath_str), 0) is not None
    
    p.warning = has_element(div, './/img[contains(@class, "warning-icon")]')
    p.autocompleted = has_element(div, './/img[contains(@class, "autocompleted")]')
    p.capped = has_element(div, './/img[contains(@class, "capped")]')
    
    p.journeys = div_to_journeys(div)
                                 
    return p

