In [3]:
import requests
from importlib import reload
from secretslocal import COOKIE, VIRTUAGYM_API_KEY, VIRTUAGYM_CLUB_SECRET, CLUB_ID
import datetime
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup

In [4]:
def extract_div_dates(html):
    soup = BeautifulSoup(html, 'html.parser')
    date_divs = {}

    for div in soup.find_all(name='div', class_='day'):
        div_id = div.get('id')
        if div_id:
            if div_id not in date_divs:
                date_divs[div_id] = ''
            date_divs[div_id] = str(div)  # Convert the element back to string
    return date_divs

In [5]:
def get_all_club_members() -> dict:
    from secretslocal import CLUB_ID, VIRTUAGYM_API_KEY, VIRTUAGYM_CLUB_SECRET
    import requests
    from data_structures.user import User 
    import json 

    url = f"https://api.virtuagym.com/api/v1/club/{CLUB_ID}/member?api_key={VIRTUAGYM_API_KEY}&club_secret={VIRTUAGYM_CLUB_SECRET}&sync_from=0"
    ret = requests.get(url)
    ret_str = ret.content.decode('utf-8')
    data = json.loads(ret_str)
    user_data = data.get('result', [])

    users = {}

    for item in user_data:
        user = User(
            item["member_id"],
            item["firstname"],
            item["lastname"],
            item["active"],
            item["is_pro"],
            item["gender"],
            item["email"],
            item["member_since"],
            item["timestamp_edit"],
            item["country"],
            item["club_id"],
            item.get("registration_date") or None,
            item.get("lang") or None,
            item["original_member_id"],
            item.get("birthday") or None # Check if "birthday" exists
        )
        users[item["member_id"]] = user.to_dict()  # Convert User object to dictionary

    return users

In [6]:
def get_profile_link(userid: int) -> str:
    from importlib import reload
    import requests
    import secretslocal
    reload(secretslocal)

    url = f'https://enphysionhealthllc.virtuagym.com/member-management/member/{userid}//member'

    headers = {
        'Accept': 'application/json, text/plain, */*',
        'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
        'Connection': 'keep-alive',
        'Cookie': f'{secretslocal.COOKIE}',
        'Referer': 'https://enphysionhealthllc.virtuagym.com/web-app/member/43950940?is_redirect=1',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
        'sec-ch-ua': '"Chromium";v="118", "Google Chrome";v="118", "Not=A?Brand";v="99"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"macOS"'
    }

    response = requests.get(url, headers=headers)
    # print(response.content)
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        parsed_data = response.json()
        if parsed_data is None:
            # print('WARNING', 'User', userid, 'experienced a null parsed data response.')
            return None
        else:
            try:
                profile_link = parsed_data['data']['profile']['profile_link']
                return profile_link
            except:
                # print('WARNING', 'User', userid, 'experienced an error in when extracting profile link from response.')
                return None
    else:
    #    print(f'{response.status_code} {response.text}')
       return None

In [7]:
def get_recent_activity_data(userid: int, user_profile_link: str, date: datetime.date) -> requests.Response:
    from importlib import reload
    import requests
    import secretslocal
    reload(secretslocal)

    # Define the request headers
    headers = {
        'Accept': '*/*',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
        'Connection': 'keep-alive',
        'Cookie': f'{secretslocal.COOKIE}',
        'Host': 'enphysionhealthllc.virtuagym.com',
        'Referer': 'https://enphysionhealthllc.virtuagym.com/user/cfcsandeep-1b4bd803/exercise',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest',
        'sec-ch-ua': '"Not/A)Brand";v="99", "Google Chrome";v="115", "Chromium";v="115"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"macOS"'
    }

    # Define the URL
    url = f'https://enphysionhealthllc.virtuagym.com{user_profile_link}/exercise/ajax?action=generate_month&date_to_get_month={str(date)}&u={userid}'

    # Make the GET request
    response = requests.get(url, headers=headers)
    return response


In [8]:
def get_trimmed_dict_with_target_dates(dict_dates_to_imgs: dict, date: datetime.date = datetime.datetime.now().date()) -> dict:
    def filter_unwanted_dates(pair):
        key, value = pair
        key_dt = datetime.datetime.strptime(key, '%Y-%m-%d').date()

        if key_dt < date - datetime.timedelta(days=12) or date < key_dt:
            return False
        else:
            return True
    return dict(filter(filter_unwanted_dates, dict_dates_to_imgs.items()))

In [9]:
def get_activity_per_day_truth_dict(activity: dict) -> dict:
    # Dictionary to store icons and their opacity for each day
    day_icons_opacity_mapping = {}

    # Iterate through each day and extract icons
    for day, html_content in activity.items():
        # Parse HTML content using BeautifulSoup
        soup = BeautifulSoup(html_content, 'html.parser')
        
        # Find all div elements with class 'icon_in_day'
        icons = soup.find_all('img', class_='icon_in_day')

        # Check the opacity for each icon and store the result
        opacity_results = []
        for icon in icons:
            try:
                # will be 0 if it can find the .5 which means they didnt do the exercise
                b = icon['style'].find('opacity:.5;')
            except:
                # will be 1 if they did the exercise
                b = 1
            opacity_results.append(b)
        
        # Store the opacity results for the day
        day_icons_opacity_mapping[day] = opacity_results

    # Print the result
    # for day, opacity_results in day_icons_opacity_mapping.items():
    #     print(f"{day}: {opacity_results}")

    activity_per_day = dict()
    for day, opacity_results in day_icons_opacity_mapping.items():
        if 1 in opacity_results:
            activity_per_day[day] = True
        else:
            activity_per_day[day] = False
        
    return activity_per_day

In [10]:
def get_account_manager(userid):
    from importlib import reload
    import requests
    import secretslocal
    reload(secretslocal)

    url = f'https://enphysionhealthllc.virtuagym.com/member-management/member/{userid}//account-manager'

    headers = {
        'Accept': 'application/json, text/plain, */*',
        'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
        'Connection': 'keep-alive',
        'Cookie': f'{secretslocal.COOKIE}',
        'Referer': 'https://enphysionhealthllc.virtuagym.com/web-app/member/43950940?is_redirect=1',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-origin',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
        'sec-ch-ua': '"Chromium";v="118", "Google Chrome";v="118", "Not=A?Brand";v="99"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"macOS"'
    }

    response = requests.get(url, headers=headers)
    # print(response.content)
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        parsed_data = response.json()
        # print(parsed_data)
        if parsed_data is None:
            # print('WARNING', 'User', userid, 'experienced a null parsed data response.')
            return None
        else:
            try:
                acct_mgr = parsed_data['data'][0]['name']
                return acct_mgr     
            except:
                # print('WARNING', 'User', userid, 'experienced an error in when extracting profile link from response.')
                return None
    else:
       print(f'{response.status_code} {response.text}')
       return None
    

In [11]:
def update_dict_with_profile_link_and_acct_mgr(users: dict) -> dict:
    # userid --> {name: blah, birthday: balh, asdf: asdf}
    for userid, userdict in users.items():
        # print(userdict['firstname'], userdict['lastname'], userdict['member_id'])

        if userdict['firstname'] == 'Joshua' and userdict['lastname'] == 'Kaplan':
            continue
        
        if not userdict['is_pro']:
            continue 
        
        profile_link = get_profile_link(userid)
        acct_mgr = get_account_manager(userid) or 'Joshua Kaplan'

        userdict['profile_link'] = profile_link
        userdict['acct_mgr'] = acct_mgr

        userdict = {k: userdict[k] for k in userdict.keys() if userdict[k] is not None}

        users[userid] = userdict
    
    return users

In [12]:
def split_users_into_active_and_inactive(users: dict) -> (dict, dict):
    users_with_profile_link = dict()
    users_without_profile_link = dict()

    for userid, userdict in users.items():
        if 'profile_link' in userdict:
            users_with_profile_link[userid] = userdict
        else:
            users_without_profile_link[userid] = userdict
    
    return users_with_profile_link, users_without_profile_link

In [13]:
def get_calendar_content_per_user(users: dict, date: datetime.date = datetime.datetime.now().date()) -> dict:
    for userid, userdict in users.items():
        proflink = userdict['profile_link']
        ret = get_recent_activity_data(userid, proflink, date)
        div_dates = extract_div_dates(ret.content)
        trimmed_dict = get_trimmed_dict_with_target_dates(div_dates, date)
        userdict['activity_calendar'] = get_activity_per_day_truth_dict(trimmed_dict)
        users[userid] = userdict
    
    return users

In [14]:
def compute_days_since_last_activity(users_with_activity_calendar: dict) -> dict:
    for userid, userdict in users_with_activity_calendar.items():
        most_recent_true_date = None
        activity = userdict['activity_calendar']

        for date in sorted(activity.keys(), reverse=True):
            if activity[date]:
                most_recent_true_date = date
                break

        if most_recent_true_date is not None:
            most_recent_true_date = datetime.datetime.strptime(most_recent_true_date, '%Y-%m-%d')
            today = datetime.datetime.now()
            days_since_activity = (today - most_recent_true_date).days
        else:
            days_since_activity = None

        userdict['days_since_last_active'] = days_since_activity
    return users_with_activity_calendar

In [15]:
def create_and_output_dataframe(data: dict) -> None:
    users = list(data.keys())
    firstnames = [v['firstname'] for _, v in data.items()]
    lastnames = [v['lastname'] for _, v in data.items()]
    days_since_last_activity = [v['days_since_last_active'] for _, v in data.items()]
    acct_mgrs = [v['acct_mgr'] if 'acct_mgr' in data[k] else None for k, v in data.items()]

    df = pd.DataFrame({
        'member_id': users,
        'firstname': firstnames,
        'lastname': lastnames,
        'days_since_last_active': days_since_last_activity,
        'account_manager': acct_mgrs
    })

    # Group by account_manager and iterate over groups
    for account_manager, group_df in df.groupby('account_manager'):
        # Save each group to a separate CSV file
        timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
        filename = f'./final_outputs/{account_manager}_{timestamp}.csv'
        group_df.to_csv(filename, index=False)
        print(f'Exported file to {filename}.')


In [16]:
def run():
    start = datetime.datetime.now()
    
    print('Getting all members.')
    users = get_all_club_members()
    step1 = datetime.datetime.now()
    print(f'Completed step in: {step1-start}')

    print('Getting profile links for members.')
    users = update_dict_with_profile_link_and_acct_mgr(users)
    step2 = datetime.datetime.now()
    print(f'Completed step in: {step2-step1}')

    print('Removing users without profile links.')
    users_with_profile_links, _ = split_users_into_active_and_inactive(users)
    step3 = datetime.datetime.now()
    print(f'Completed step in: {step3-step2}')

    print('Get calendar content per user.')
    users_with_activity_calendar = get_calendar_content_per_user(users_with_profile_links, datetime.datetime.now().date())
    step4 = datetime.datetime.now()
    print(f'Completed step in: {step4-step3}')

    print('Compute days since last activity.')
    users_with_activity_calendar = compute_days_since_last_activity(users_with_activity_calendar)    
    step5 = datetime.datetime.now()
    print(f'Completed step in: {step5-step4}')

    print('Creating and outputting dataframe.')
    create_and_output_dataframe(users_with_activity_calendar)
    step6 = datetime.datetime.now()
    print(f'Completed step in: {step6-step5}')

    end = datetime.datetime.now()
    print(f'Finished process in: {end-start}')

DO THE ACTUAL DATA PULLING

In [15]:
users = get_all_club_members()

In [16]:
users[41468290]

{'member_id': 41468290,
 'firstname': 'Nancy',
 'lastname': 'Nizel',
 'active': True,
 'is_pro': True,
 'gender': 'u',
 'email': 'nnizel@gmail.com',
 'member_since': 1683676800000,
 'timestamp_edit': 1683726281242,
 'country': 'US',
 'club_id': 56930,
 'registration_date': 1683725238,
 'lang': 'en',
 'original_member_id': 0}

In [17]:
get_profile_link(41468290)

'/user/nnizel-b4263e14'

In [18]:
tmp = {41468290: users[41468290]}
tmp

{41468290: {'member_id': 41468290,
  'firstname': 'Nancy',
  'lastname': 'Nizel',
  'active': True,
  'is_pro': True,
  'gender': 'u',
  'email': 'nnizel@gmail.com',
  'member_since': 1683676800000,
  'timestamp_edit': 1683726281242,
  'country': 'US',
  'club_id': 56930,
  'registration_date': 1683725238,
  'lang': 'en',
  'original_member_id': 0}}

In [19]:
tmp = update_dict_with_profile_link_and_acct_mgr(tmp)

In [20]:
users_with_profile_links, users_without_profile_links = split_users_into_active_and_inactive(tmp)

In [21]:
print(len(users_with_profile_links))
print(len(users_without_profile_links))

1
0


In [22]:
users_with_profile_links[41468290]

{'member_id': 41468290,
 'firstname': 'Nancy',
 'lastname': 'Nizel',
 'active': True,
 'is_pro': True,
 'gender': 'u',
 'email': 'nnizel@gmail.com',
 'member_since': 1683676800000,
 'timestamp_edit': 1683726281242,
 'country': 'US',
 'club_id': 56930,
 'registration_date': 1683725238,
 'lang': 'en',
 'original_member_id': 0,
 'profile_link': '/user/nnizel-b4263e14',
 'acct_mgr': 'Joshua Kaplan'}

In [23]:
users_with_activity_calendar = get_calendar_content_per_user(users_with_profile_links)

In [24]:
get_calendar_content_per_user({k: v for k, v in users_with_profile_links.items() if k == 41468290})

{41468290: {'member_id': 41468290,
  'firstname': 'Nancy',
  'lastname': 'Nizel',
  'active': True,
  'is_pro': True,
  'gender': 'u',
  'email': 'nnizel@gmail.com',
  'member_since': 1683676800000,
  'timestamp_edit': 1683726281242,
  'country': 'US',
  'club_id': 56930,
  'registration_date': 1683725238,
  'lang': 'en',
  'original_member_id': 0,
  'profile_link': '/user/nnizel-b4263e14',
  'acct_mgr': 'Joshua Kaplan',
  'activity_calendar': {'2024-04-30': False,
   '2024-05-01': True,
   '2024-05-02': True,
   '2024-05-03': True,
   '2024-05-04': False,
   '2024-05-05': True,
   '2024-05-06': True,
   '2024-05-07': True,
   '2024-05-08': True,
   '2024-05-09': False,
   '2024-05-10': True,
   '2024-05-11': True,
   '2024-05-12': False}}}

In [25]:
users_with_activity_calendar[41468290]['activity_calendar']

{'2024-04-30': False,
 '2024-05-01': True,
 '2024-05-02': True,
 '2024-05-03': True,
 '2024-05-04': False,
 '2024-05-05': True,
 '2024-05-06': True,
 '2024-05-07': True,
 '2024-05-08': True,
 '2024-05-09': False,
 '2024-05-10': True,
 '2024-05-11': True,
 '2024-05-12': False}

In [26]:
users_with_activity_calendar = compute_days_since_last_activity(users_with_activity_calendar)

In [27]:
users_with_activity_calendar[41468290]

{'member_id': 41468290,
 'firstname': 'Nancy',
 'lastname': 'Nizel',
 'active': True,
 'is_pro': True,
 'gender': 'u',
 'email': 'nnizel@gmail.com',
 'member_since': 1683676800000,
 'timestamp_edit': 1683726281242,
 'country': 'US',
 'club_id': 56930,
 'registration_date': 1683725238,
 'lang': 'en',
 'original_member_id': 0,
 'profile_link': '/user/nnizel-b4263e14',
 'acct_mgr': 'Joshua Kaplan',
 'activity_calendar': {'2024-04-30': False,
  '2024-05-01': True,
  '2024-05-02': True,
  '2024-05-03': True,
  '2024-05-04': False,
  '2024-05-05': True,
  '2024-05-06': True,
  '2024-05-07': True,
  '2024-05-08': True,
  '2024-05-09': False,
  '2024-05-10': True,
  '2024-05-11': True,
  '2024-05-12': False},
 'days_since_last_active': 1}

In [28]:
create_and_output_dataframe(users_with_activity_calendar)

Exported file to ./final_outputs/Joshua Kaplan_20240512153833.csv.


ACTUAL RUNS

In [17]:
run()

Getting all members.
Completed step in: 0:00:01.347071
Getting profile links for members.
401 {"message":"Unauthorized"}
401 {"message":"Unauthorized"}
401 {"message":"Unauthorized"}
401 {"message":"Unauthorized"}
Completed step in: 0:04:50.739768
Removing users without profile links.
Completed step in: 0:00:00.000397
Get calendar content per user.
Completed step in: 0:02:43.571926
Compute days since last activity.
Completed step in: 0:00:00.001673
Creating and outputting dataframe.
Exported file to ./final_outputs/Egle Staisiunaite_20240602233922.csv.
Exported file to ./final_outputs/Joshua Kaplan_20240602233922.csv.
Exported file to ./final_outputs/Kate O'Sullivan_20240602233922.csv.
Exported file to ./final_outputs/Rachel Barg_20240602233922.csv.
Completed step in: 0:00:00.023007
Finished process in: 0:07:35.683856
