In [None]:
#imports

import os
import requests
import urllib3
import json 
import pandas as pd
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.image import MIMEImage
import matplotlib.pyplot as plt
import polyline
import folium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from openai import OpenAI
from geopy.geocoders import GoogleV3
import seaborn as sns

##################################

#setting up Strava API

access_token = 'your_access_token_placeholder'
auth_url = "https://www.strava.com/oauth/token"
activities_url = "https://www.strava.com/api/v3/athlete/activities"
athlete_stats_url = "https://www.strava.com/api/v3/athletes/your_athlete_id/stats"
athlete_id = 'your_athlete_id_placeholder'

payload = {
    'client_id': "your_client_id_placeholder",
    'client_secret': 'your_client_secret_placeholder',
    'refresh_token': 'your_refresh_token_placeholder',
    'grant_type': "refresh_token",
    'f': 'json'
    
print("Requesting Token...\n")
res = requests.post(auth_url, data=payload, verify=False)
access_token = res.json()['access_token']

print("Access Token = {}\n".format(access_token))
header = {'Authorization': 'Bearer ' + access_token}
param = {'per_page': 200, 'page': 1}

###################################

                               
###################################
#Functions

def calculate_pace_minutes_seconds_per_km(distance_meters, time_seconds):
    pace_minutes_per_km = (time_seconds / 60) / (distance_meters / 1000)
    pace_minutes = int(pace_minutes_per_km)
    pace_seconds = int((pace_minutes_per_km - pace_minutes) * 60)
    return pace_minutes, pace_seconds

def laps_from_activity(activity_id, access_token):
    bearer_header = "Bearer " + str(access_token)
    strava_activity_url = "https://www.strava.com/api/v3/activities/" + str(activity_id) + "/laps"
    headers = {'Content-Type': 'application/json', 'Authorization': bearer_header}
    response = requests.get(strava_activity_url, headers=headers, )
    more_activity_data = response.json()
    return more_activity_data

def format_time(seconds):
    minutes = seconds // 60
    seconds = seconds % 60
    return f"{minutes:02d}:{seconds:02d}"
    
###################################

###################################

#Data requests
#*activities
dataset = requests.get(activities_url, headers=header, params=param).json()
dataset_stats = requests.get(activities_url, headers=header, params=param).json()
#*statistics
athlete_stats = requests.get(athlete_stats_url.format(athlete_id=athlete_id), headers=header).json()
stats_df = pd.DataFrame(athlete_stats)

# Data filtering
columns_to_remove = ['resource_state', 'sport_type', 'workout_type', 'utc_offset', \
                     'location_city', 'location_state', 'location_country', 'kudos_count', \
                     'comment_count', 'photo_count', 'trainer', 'commute', 'manual', 'private',\
                    'visibility', 'flagged', 'gear_id', 'heartrate_opt_out', 'display_hide_heartrate_option',\
                    'upload_id', 'upload_id_str', 'external_id', 'from_accepted_tag', 'total_photo_count', \
                     'has_kudoed', 'average_watts', 'kilojoules', 'device_watts', 'elapsed_time']


df = pd.DataFrame(dataset)
df = df.drop(columns=columns_to_remove)
df = df[~df['type'].isin(['Ride', 'Workout', 'Walk'])]
df['start_date'] = pd.to_datetime(df['start_date']).dt.date
df['start_latlng'] = df['start_latlng'].apply(str)
df['start_latlng'] = df['start_latlng'].str.replace('[', '').str.replace(']', '')
df['start_date_local'] = df['start_date_local'].str.replace('Z', '')
df['start_date_local'] = df['start_date_local'].apply(str)
date_for_weather = df['start_date_local'][0]
df = df.reset_index()

#############################################
# Create laps dataframe using laps_from_activity() function

laps_df = pd.DataFrame(laps_from_activity(df['id'][0], access_token))

lap_lst = []

for i in range(len(laps_df)):
    lap_lst.append(f"""For {laps_df['name'][i]}: distance covered = {laps_df['distance'][i] / 1000}km,
    move time = {laps_df['moving_time'][i]} seconds, 
    elevation gain = {laps_df['total_elevation_gain'][i]},
    average speed = {laps_df['average_speed'][i]},
    max speed = {laps_df['max_speed'][i]},
    average cadence = {laps_df['average_cadence'][i]}
    average heartrate = {laps_df['average_heartrate'][i]}
    max heartrate = {laps_df['max_heartrate'][i]}
    """)  
    
laps_df['formatted_time'] = laps_df['moving_time'].apply(format_time)
laps_df.loc[len(laps_df) -1, "formatted_time"] = format_time(int(laps_df.loc[len(laps_df) -1, "moving_time"] / laps_df.loc[len(laps_df) -1, "distance"] * 1000))

#############################################

#Polyline 

strava_polyline = df['map'][0]['summary_polyline']
decoded_coords = polyline.decode(strava_polyline)
latitudes = [coord[0] for coord in decoded_coords]
longitudes = [coord[1] for coord in decoded_coords]
avg_lat = sum(point[0] for point in decoded_coords) / len(decoded_coords)
avg_lon = sum(point[1] for point in decoded_coords) / len(decoded_coords)

#creating map


m = folium.Map(location=[avg_lat, avg_lon], zoom_start=14)
folium.PolyLine(locations=decoded_coords, color='green', weight=8, opacity=0.7).add_to(m)

file_path = "map.html"  
m.save(file_path)

options = webdriver.ChromeOptions()
options.add_argument("headless")

driver = webdriver.Chrome(options=options)
driver.get("file://" + os.path.abspath(file_path))

screenshot_path = os.path.join(os.getcwd(), "map_screenshot.png")  
driver.save_screenshot(screenshot_path) 

driver.quit()

#Data analysis

mean_move_time_seconds = df['moving_time'].mean()
mean_move_time_minutes = int(mean_move_time_seconds // 60)
mean_move_time_seconds_remainder = int(mean_move_time_seconds % 60)
mean_move_time_formatted = f"{mean_move_time_minutes}:{mean_move_time_seconds_remainder:02d}"
    
average_heart_rate_total = df["average_heartrate"].mean()
most_recent_average_heartrate = df["average_heartrate"][0]

distance_total = df['distance'].sum()
time_total = df['moving_time'].sum()

most_recent_distance = df['distance'][0]
most_recent_move_time = df['moving_time'][0]
df['movetime_formatted'] = df['moving_time'].apply(lambda x: f"{x // 60}:{x % 60:02d}")
pace_minutes_total, pace_seconds_total = calculate_pace_minutes_seconds_per_km(distance_total, time_total)
pace_minutes, pace_seconds = calculate_pace_minutes_seconds_per_km(most_recent_distance, most_recent_move_time)

#plots

sns.lineplot(data=laps_df, x='name', y='average_heartrate', color = 'orange')
plt.gca().spines['top'].set_visible(True)
plt.gca().spines['right'].set_visible(False)
plt.gca().set_facecolor('#f5f5f5') 
for index, row in laps_df.iterrows():
    plt.text(index, row['average_heartrate'], f"{row['formatted_time']}", ha='center', va='top')
plt.axhline(y=df['average_heartrate'][0], color = 'r')

plt.xlabel('Michael Larter')
plt.ylabel('Average Heart Rate')
plt.title(f'Average Heart Rate + Pace for {df["name"][0]} on {df["start_date"][0]}')
plt.grid(True)
plt.grid(axis='y', linestyle='--', linewidth=0.5)
#plt.figure(figsize=(10, 6)) for some reason this prevents the plot from saving correctly 
sns.set_palette("pastel")

#plt.show()
plt.savefig('running_data_plot.png')

#############################################
#Google 
geolocator = GoogleV3(api_key='Your API Key')
location = geolocator.reverse(df['start_latlng'][0]) 

#Weather
WS = requests.get(f'https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{location[0]}/{date_for_weather}?&unitGroup=metric&key=GTZJSHJ73NUUGMX3ZET2SJXRC', headers=header, params=param).json()

Hour = df['start_date_local'][0]
hour_part = Hour[11:13]
formatted_hour = f"{hour_part}:00:00"
selected_dict = next((hour_dict for hour_dict in WS['days'][0]['hours'] if hour_dict['datetime'] == formatted_hour), None)

weather_conditions = f"""The conditions during the run were {selected_dict['conditions']}. The temperature in celcius = {selected_dict['temp']}, the humidity = {selected_dict['humidity']},
the wind speed = {selected_dict['windspeed']}m/s, the precipitation = {selected_dict['precip']}"""

#############################################

#Open AI setup

api_key = "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"
client = OpenAI(api_key = "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")
role = "You are a performance analyst for a professional runner."

instructions = f"""Looking at the following data on their past performance and their most recent run. \n 
After the first paragraph provide their running statistics in an easy to read format and then provide tips for their future runs to help them improve their performance. \n
Provide the message as if you are talking to them directly. \n
Their average distance they run is {(df['distance'].mean() / 1000).round(1)}km, with an average moving time of {mean_move_time_formatted}, their average pace is {pace_minutes_total}:{pace_seconds_total:02d}/km and they average at a heart rate of {average_heart_rate_total.round()}BPM.\n
On this run they ran {(most_recent_distance / 1000).round(1)}km with a moving time of {df['movetime_formatted'][0]}., their pace was {pace_minutes}:{pace_seconds:02d} /km and their heart rate averaged at {most_recent_average_heartrate.round()}BPM.
Also do an analysis on all the laps in one small paragraph(do not list each lap, instead give a breakdown on differences between the laps and correlations that could be seen between them), look for trends in the data and areas that need attention: {lap_lst}
Also take into account the weather conditions and how they could affect the run: {weather_conditions}
Finally, end with Best regards, Your Performance Analyst AI
"""

def chat_gpt_bot(client, role, prompt_instructions):
    completion = client.chat.completions.create(
      model="gpt-3.5-turbo",
      messages=[
        {"role": "system", "content": role},
        {"role": "user", "content": prompt_instructions}
      ]
    )
    return completion

performance_analysis = chat_gpt_bot(client, role, instructions)

#############################################

#emailer setup
sender_email = "Your email address"
receiver_email = "Recipients Email address"
password = "Your email password"

message = MIMEMultipart()
message['From'] = sender_email
message['To'] = receiver_email
message['Subject'] = "Running Data Analysis"

#the message

body = performance_analysis.choices[0].message.content.replace('\n', '<br>')
full_body = f"""
<html>
  <body>
    <img src="cid:map_screenshot.png width="400" height="300"">
    <img src="cid:running_data_plot.png">
    <p>{body}</p>
  </body>
</html>
"""

message.attach(MIMEText(full_body, 'html'))

with open('running_data_plot.png', 'rb') as fp:
    img_plot = MIMEImage(fp.read())
    img_plot.add_header('Content-ID', '<running_data_plot.png>')
    message.attach(img_plot)

with open("map_screenshot.png", "rb") as f:
    img_map = MIMEImage(f.read())
    img_map.add_header('Content-ID', '<map_screenshot.png>')
    message.attach(img_map)

#connecting to the server
smtp_server = smtplib.SMTP('smtp.gmail.com', 587)
smtp_server.starttls()
smtp_server.login(sender_email, password)

#sending the message
smtp_server.sendmail(sender_email, receiver_email, message.as_string())

# quitting the server 
smtp_server.quit()

df