In [None]:
import parse_tfl_json
import google

In [None]:
import pandas as pd 
from mylibrary.connections import cursor, conn, engine, Automapped_Base, session

sql = """
select * from tt_h.stations_journeytime as sjt
where tfl_message = 'ok' and
 sjt.icscode not in (select icscode from tt_h.all_stations where in_sprawl = true) and 
 sjt.icscode in (select icscode from tt_h.all_stations where lat < 53.39926695813 and lng > -2.54)
"""

df = pd.read_sql(sql, conn)

In [None]:
from process_journeys import get_station_lat_lng_from_icscode, get_london_icscodes, add_cycle_and_total_time, remove_journeys_not_arriving_clondon
from google import get_cycle_info
import parse_tfl_json
import json

In [None]:
for r in df.iterrows():
    index = r[0]
    row = r[1]
    
    lat, lng = get_station_lat_lng_from_icscode(row["icscode"])

    df.loc[index, "depart_lat"] = lat
    df.loc[index, "depart_lng"] = lng
    
    try:
        j1 = row["tfl_response"]
        j2 = json.loads(j1)
    except:
        print("parse problem")
        continue
    
    try:
        journeys = j2["journeys"]
    except:
        print("no journeys in json")
        continue
    
    # Fill in journey summary so it exists even for journeys that don't end in central london
    journey = journeys[0]
    legs = journey["legs"]
    summary = parse_tfl_json.summarise_legs(legs)
    df.loc[index, "natrail_journey_summary"] = summary
    
    for journey in journeys:
        legs = journey["legs"]
        journey["legs"] = parse_tfl_json.remove_non_national_rail(legs)
    
    # Keep only journeys with a national rail component
    journeys = [j for j in journeys if len(j["legs"]) > 0]

    # Keep only journeys that end in central London  
    try:
        journeys = remove_journeys_not_arriving_clondon(journeys)
    except:
        print("Problem with journey starting at {}".format(row["station_name"]))
        continue

    
    if len(journeys) == 0:
        continue

    journeys = add_cycle_and_total_time(journeys, row["tfl_dest"])

    journeys = sorted(journeys, key = lambda x: x["total_time"])

    journey = journeys[0]
    legs = journey["legs"]

    summary = parse_tfl_json.summarise_legs(legs)
    df.loc[index, "natrail_journey_summary"] = summary

    df.loc[index, "natrail_train_changes"] = parse_tfl_json.num_changes(legs)
    df.loc[index, "final_arrival"] = parse_tfl_json.final_arrival(legs)

    travel_time_minutes = parse_tfl_json.get_total_travel_time(legs) 
    df.loc[index, "natrail_journey_minutes"] = travel_time_minutes

    lat_lng = parse_tfl_json.lat_lng_from_legs(legs)
    
    
    df.loc[index, "arrive_lat"] = lat_lng["arrive"]["lat"]
    df.loc[index, "arrive_lng"] = lat_lng["arrive"]["lng"]

    df.loc[index, "cycle_minutes"] = journey["cycle_minutes"]
    df.loc[index, "cycle_miles"] = journey["cycle_miles"]
    df.loc[index, "total_journeytime"] = journey["cycle_minutes"] + travel_time_minutes

    
    
cols = [c for c in df.columns if c not in ["id", "tfl_request", "tfl_response", "querydict"]]
df2 = df[cols]


In [None]:
df3 = df2.sort_values(["station_name", "tfl_dest", "total_journeytime"]).drop_duplicates(["station_name", "tfl_dest"])

In [None]:
df3.to_sql("parsed_rail_journeys", engine, schema="tt_h", if_exists="replace", index=False)

In [None]:
sql = """
ALTER TABLE tt_h.parsed_rail_journeys ADD COLUMN id SERIAL PRIMARY KEY;
"""
cursor.execute(sql)
conn.commit()

In [None]:
# import utils
# import json

# counter = 0
# for r in df3.iterrows():
    
#     index = r[0]
#     row = r[1]

#     if pd.notnull(row.natrail_journey_minutes):
#         continue
    
#     counter += 1
#     print(counter)
#     depart_icscode = row["icscode"]

#     sql = """
#     select * from tt_h.anomalies_central_london_station
#     where icscode_depart = '{}'
#     """

#     df = pd.read_sql(sql.format(row["icscode"]), conn)
#     arrival_icscode = df.loc[0, "arrival_icscode"]

#     attempts = utils.get_attempt(arrival_icscode, ["0800","0820", "0840"], ["20170817","20170906","20170908"])
#     for a in attempts:
#         a["from"] = depart_icscode

#     from mylibrary.tfl_helpers import get_journeyplanner_results
#     jny = get_journeyplanner_results(attempts)
#     journeys = json.loads(jny["tfl_response"])["journeys"]
    
#     for journey in journeys:
#         legs = journey["legs"]
#         journey["legs"] = parse_tfl_json.remove_non_national_rail(legs)
        
#     journeys = remove_journeys_not_arriving_clondon(journeys)
#     journeys = add_cycle_and_total_time(journeys, row["tfl_dest"])
#     journeys = sorted(journeys, key = lambda x: x["total_time"])
    
#     journey = journeys[0]
#     legs = journey["legs"]

#     summary = parse_tfl_json.summarise_legs(legs)
#     df3.loc[index, "natrail_journey_summary"] = summary

#     df3.loc[index, "natrail_train_changes"] = parse_tfl_json.num_changes(legs)
#     df3.loc[index, "final_arrival"] = parse_tfl_json.final_arrival(legs)

#     travel_time_minutes = parse_tfl_json.get_total_travel_time(legs) 
#     df3.loc[index, "natrail_journey_minutes"] = travel_time_minutes

#     lat_lng = parse_tfl_json.lat_lng_from_legs(legs)
    
    
#     df3.loc[index, "arrive_lat"] = lat_lng["arrive"]["lat"]
#     df3.loc[index, "arrive_lng"] = lat_lng["arrive"]["lng"]

#     df3.loc[index, "cycle_minutes"] = journey["cycle_minutes"]
#     df3.loc[index, "cycle_miles"] = journey["cycle_miles"]
#     df3.loc[index, "total_journeytime"] = journey["cycle_minutes"] + travel_time_minutes
    
    

In [None]:
# What follows should go in a data manipulation script

In [None]:
df_pf = df3[df3["tfl_dest"] == "SW1H9AJ"]
df_cw = df3[df3["tfl_dest"] == "E145HP"]

In [None]:
all_stations_data = df_pf.merge(df_cw, how='left', on=["nlc", "icscode", "station_name", "tfl_message", "depart_lat", "depart_lng"], suffixes=("_pf", "_cw"))

In [None]:
# Write out to Postgres
all_stations_data.to_sql("stations_maps_data", engine, schema="tt_h", if_exists="replace", index=False)

In [None]:
all_stations_data.to_csv("stations_maps_data.csv", encoding='utf-8', index=False)

In [None]:
all_stations_data.rename(columns={"depart_lat":"lat", "depart_lng":"lng"}, inplace=True)
all_stations_data.to_csv("interactive_maps_template/data/stations_maps_data.csv", encoding='utf-8', index=False)

In [None]:
# What's going on with e.g. newport?
# df3[df3["icscode"] == "1000647"]


In [None]:
df_pf[df_pf.station_name.str.contains("udley")]