# Data ETL

## Import Modules

In [1]:
import os
import sys
sys.path.insert(0,os.path.join("..","..","Resources","AccessInformation"))

from accessinformation import access_token
from stravalib import Client , unithelper

import pandas as pd
import numpy as np

client = Client(access_token =access_token)

## Structuring Data for JSON Export

In [2]:
print(f'Runner information: {client.get_athlete().firstname} {client.get_athlete().lastname}')

Runner information: Raul Maldonado


In [3]:
def AthleteActivities(client):
    return(client.get_activities())

# def Streams(client_id, activity, types):
#     streams = client_id.get_activity_streams(activity, types=types, series_type='time')
#     return(streams)
# activities = AthleteActivities(client)

In [4]:
def dictionaryOfDF_toDF(diction):
    listOfDataFrames = list(diction.values())
    #df = listOfDataFrames[0]
    #print(listOfDataFrames[1:])
    df = pd.concat(listOfDataFrames)
#     for otherDataFrame in  listOfDataFrames[1:]:
        
#         df.concat(otherDataFrame, ignore_index=True)
#     #[df.append(otherDataFrame, ignore_index=True) for otherDataFrame in listOfDataFrames[1:]]
    #df.reset_index(drop = True, inplace =True)
    return(df)

In [5]:
types = ['time', 'heartrate','distance', 'latlng', 'altitude', 'velocity_smooth', 'moving', 'grade_smooth', 'temp']
def getActivityStream(activitiesObject):
    dataFrameDictionary = {}
    for activity in activitiesObject:
        try:
            actID = activity.id
            actName = activity.name
            streamObject = client.get_activity_streams(actID, types=types, series_type='time')
            #columns = ["id","LatLong","Time","Distance","Altitude","Grade_Smooth","Moving","Velocity_Smooth"]

            df=pd.DataFrame( {"LatLong": streamObject['latlng'].data, "Time": streamObject['time'].data ,
                              "HeartRate": streamObject['heartrate'].data, "Distance": streamObject['distance'].data,"Altitude": streamObject['altitude'].data 
                           ,"Grade_Smooth": streamObject['grade_smooth'].data, 'Moving': streamObject['moving'].data 
                           ,"Velocity_Smooth":streamObject['velocity_smooth'].data})
            df['Latitude'] = df['LatLong'].apply(lambda x: x[0])
            df['Longitude'] = df['LatLong'].apply(lambda x: x[1])
            df.drop('LatLong',inplace = True,axis=1)
            df["Date"] = activity.start_date.date()
        except KeyError as e:
            continue
        dataFrameDictionary[f'{actName.replace(" ","")}-{actID}'] = df
    return(dataFrameDictionary)

In [6]:
dataframe_dictionary = getActivityStream(AthleteActivities(client))

# Export Dictionary of Dataframes to Json files

In [7]:
for key,values in dataframe_dictionary.items():
    values.to_json(f"../../Data/JSONData/{key}.json", orient='records')

In [8]:
main_df = dictionaryOfDF_toDF(dataframe_dictionary)
main_df.to_json(f"../../Data/JSONData/MainDataset.json", orient='records')

## Data to SQL database

In [9]:
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, ForeignKey
from sqlalchemy import Column, Date, Integer, String
from sqlalchemy.ext.declarative import declarative_base

import pandas as pd

engine = create_engine('sqlite:///../../Analysis/db/RunningData.db')

In [17]:
Base = declarative_base()

class Segments(Base):
    __tablename__ = "Segments"
    ids = Column(Integer, primary_key = True)
    time = Column(Integer)
    distance = Column(Integer)
    heartrate = Column(Integer)
    altitude = Column(Integer)
    grade_smooth = Column(Integer)
    moving = Column(Integer)
    velocity_smooth = Column(Integer)
    latitude = Column(Integer)
    longitude = Column(Integer)
    
    def __init__(self, time, heartrate, distance, altitude, grade_smooth, moving, \
                velocity_smooth, latitude, longitude):
        self.time = time
        self.heartrate = heartrate
        self.distance = distance
        self.altitude = altitude 
        self.grade_smooth = grade_smooth
        self.moving = moving
        self.velocity_smooth = velocity_smooth
        self.latitude = latitude
        self.longitude = longitude
        
Base.metadata.create_all(engine)

session = Session(bind = engine)
for index, rows in main_df.iterrows():
    session.add(Segments( time = rows['Time'], heartrate = rows['HeartRate'],distance = rows['Distance'], 
                         altitude = rows['Altitude'], grade_smooth = rows['Grade_Smooth'], moving = rows['Moving'],
                         velocity_smooth = rows['Velocity_Smooth'],latitude = rows['Latitude'],longitude = rows['Longitude']))

session.commit()

In [24]:
[i for i in engine.execute('SELECT * FROM Segments LIMIT 10;')]

[(1, 0, 1.4, 146, 25.1, 0, 0, 0, 37.653178, -121.012971),
 (2, 5, 12.7, 142, 25, 0, 1, 2.3, 37.653181, -121.013099),
 (3, 8, 21.5, 145, 25.1, 0, 1, 2.5, 37.653181, -121.013199),
 (4, 15, 42.5, 147, 25.1, 0.2, 1, 3, 37.65315, -121.013433),
 (5, 18, 50.6, 150, 25.1, 0, 1, 2.9, 37.653147, -121.013524),
 (6, 20, 56.3, 146, 25.1, 0, 1, 2.8, 37.653143, -121.013589),
 (7, 22, 61.8, 149, 25.1, 0, 1, 2.8, 37.653142, -121.013651),
 (8, 26, 73.3, 146, 25.1, 0.2, 1, 2.8, 37.653139, -121.013782),
 (9, 29, 82.3, 149, 25.1, -0.2, 1, 2.9, 37.653135, -121.013883),
 (10, 36, 102.9, 149, 25.2, -0.4, 1, 3, 37.653111, -121.014115)]