# Moving data from the Strava API to a PostgreSQL database

Before you do anything here, you need to do a few initial steps:

1. Install flask, psycopg2, and requests python libraries
2. Have access to a postgres database `brew install postgres` on a Mac
3. Have access to a Stava API account https://www.strava.com/settings/api
4. Set up the proper values in `app-config.json` (from steps 1 and 2)
5. Run `python strava.py` at the command line to set up the API Oath token


In [None]:
import re
import time
import os
import json
import requests
import pandas as pd
import sqlalchemy
import psycopg2
import psycopg2.extras

from sqlalchemy import create_engine
from stravaio import StravaIO

In [None]:
# if you haven't refreshed in 6 hours, you need to do this.
strava.refresh_token()

### Get all activities and load into the database

In [None]:
page = 1
activities = []
while True:
    url = f'https://www.strava.com/api/v3/athlete/activities?page={page}&per_page=100'
    r = requests.get(url, headers=strava.get_auth_headers())
    res = json.loads(r.text)
    if len(res) > 0:
        print("activity count: %s" % len(res))
        activities += res
    else:
        break
    page += 1
df = pd.json_normalize(activities)
df[['start_lat','start_lon']] = pd.DataFrame(df.start_latlng.tolist(), index=df.index)
df[['end_lat','end_lon']] = pd.DataFrame(df.end_latlng.tolist(), index=df.index)
df.to_pickle('activities.pickle')

db = create_engine(os.environ("PGDB"), echo=False)
df.to_sql(schema='strava', name='activity', con=db, index=False, if_exists='replace',
          method='multi', dtype={"athlete": sqlalchemy.types.JSON,
                                 "map": sqlalchemy.types.JSON,
                                 "start_latlng": sqlalchemy.types.JSON,
                                 "end_latlng": sqlalchemy.types.JSON
                                 })


### Get activity details (streams in Strava), for previously download activities and load into database 

In [None]:
routes = {}

In [None]:
headers = strava.get_auth_headers()

for num, activity in enumerate(activities):
    if num % 20 == 0:
        print("|%s|" % activity['name'])
    r = requests.get('https://www.strava.com/api/v3/activities/%s/streams?keys=%s&key_by_type=%s' % (
        activity['id'],
        "time,latlng,distance,altitude,velocity_smooth,heartrate,temp,moving,grade_smooth,watts",
        "true"
        ), headers=headers)
    data = json.loads(r.text)
    try:
        if 'time' not in data.keys():
            print(data)
        routes[activity["id"]] = data
    except:
        print("Something failed terribly!")
        break
    time.sleep(10)


In [None]:
print("put routes together")
dfs = []
for activity_id, route in routes.items():
    col_data = {}
    try:
        for col, route_data in route.items():
            col_data[col] = route_data['data']
    except:
        print("%s: %s" % (activity_id, col))
    col_data["activity_id"] = activity_id
    try:
        dfs.append(pd.DataFrame.from_dict(col_data))
    except:
        print(route['id'])
print("concat routes together")
df = pd.concat(dfs)
df['lat'] = df.latlng.apply(lambda x: x[0] if type(x) == list else -1)
df['lon'] = df.latlng.apply(lambda x: x[1] if type(x) == list else -1)
df.drop(['latlng'], axis=1, inplace=True)
print("write to db")
db = create_engine(os.environ("PGDB"), echo=False)
df.to_csv('activity_detail.txt', sep="\t", header=False, index=False)
df.head(10).to_sql(schema='strava', name='activity_detail', con=db, index=False, chunksize=100, method='multi')
print("done")

Need to remove all records from strava.activity_detail, then bulk import

```
psql "$PGDB" -c "truncate table strava.activity_detail;"
cat activity_detail.txt | psql "$PGDB" -c "copy strava.activity_detail from stdin null ''"
```

### Export letters to json so that it can be consumed by a website.

In [None]:
conn = psycopg2.connect(os.environ("PGDB"))

cur = conn.cursor(cursor_factory = psycopg2.extras.RealDictCursor)
sql = """
with points as(
select activity_id, json_agg(array[lat, lon]) as ll
from strava.activity_detail
group by 1
)

, d as(
select id, distance, moving_time, elapsed_time, total_elevation_gain
, type, sport_type, start_date_local, achievement_count, average_speed
, max_speed, average_temp, average_heartrate, elev_high, elev_low
, trim(a.name) as letter, ll
from strava.activity a
join points p on a.id = p.activity_id
where length(a.name) <= 2
)

select *
from d
order by "letter"
"""
cur.execute(sql)
row = cur.fetchone()
letters = {}
fw = open("letters.json", "w")
fw.write("{")
while row is not None:
    fw.write("%s:%s" % (json.dumps(row["letter"]), json.dumps(row)))
    row = cur.fetchone()
    if row is not None:
        fw.write(",\n")
fw.write("}")
fw.close()
