Open this notebook in Google Colab : [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Riminder/hrflow-cookbook/blob/main/examples/%5BStoring%5D%20archive_profiles_based_on_creation_date.ipynb)

In [1]:
# Copyright 2023 HrFlow's AI Research Department. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

**Welcome to this Google Colaboratory tutorial!** This notebook guides you through archiving profiles in an Hrflow source based on how long they’ve been stored.

Here’s a quick summary of the workflow:

* 📥 Get profiles: Retrieve all profiles from the source.
* ⏳ Calculate duration: Determine the age of each profile in the source.
* 🗃️ Archive profiles: Archive profiles older than the specified duration.

In [None]:
!pip install --quiet hrflow requests tqdm

In [2]:
import re
from hrflow import Hrflow
from tqdm import tqdm
from hrflow.utils import get_all_profiles
from datetime import datetime, timezone
from getpass import getpass

API_SECRET = getpass("YOUR_API_SECRET")
API_USER = getpass("USER@EMAIL.DOMAIN")
SOURCE_KEY = getpass("YOUR_SOURCE_KEY")
MIN_DURATION = getpass("YOUR_DURATION")

In [None]:
hrflow_client = Hrflow(api_secret=API_SECRET, api_user=API_USER)

In [None]:
def calculate_duration_in_years(input_date):
    """
    Calculate the duration in decimal years between now and the input date.

    Args:
        input_date (str): Date string in the format "YYYY-MM-DDTHH:MM:SS[+0000|Z]".

    Returns:
        float: Duration in decimal years.
    """
    try:
        if input_date.endswith("Z"):
            input_date = input_date.replace("Z", "+0000")
        
        input_datetime = datetime.strptime(input_date, "%Y-%m-%dT%H:%M:%S%z")
        now = datetime.now(timezone.utc)
        duration_seconds = abs((now - input_datetime).total_seconds())
        decimal_years = duration_seconds / (365.25 * 24 * 3600)
        return round(decimal_years, 6)
    except Exception as e:
        print(f"Error: {str(e)}. Expected format: 'YYYY-MM-DDTHH:MM:SS[+0000|Z]'.")
        return 

def convert_to_decimal_years(period):
    """
    Convert a duration string (e.g., '2y', '6m', '3w', '10d') into decimal years.

    Args:
        period (str): Duration string in the format 'Xy' (years), 'Xm' (months),
                      'Xw' (weeks), or 'Xd' (days).

    Returns:
        float: Equivalent duration in decimal years.
    """
    match = re.match(r'(\d+)(y|m|w|d)', period.strip().lower())
    if not match:
        raise ValueError(f"Invalid period format: {period}. Use 'Xy' for years, 'Xm' for months, 'Xw' for weeks, or 'Xd' for days.")
    
    value, unit = match.groups()
    value = int(value)
    
    if unit == 'y':
        return float(value)  # Directly return years as decimal years
    elif unit == 'm':
        return value / 12  # Convert months to years (e.g., 6 months = 0.5 years)
    elif unit == 'w':
        return value / 52.1775  # Convert weeks to years (e.g., 1 week ≈ 0.019 years)
    elif unit == 'd':
        return value / 365.25  # Convert days to years (e.g., 1 day ≈ 0.0027 years)

In [None]:
min_duration_decimal = convert_to_decimal_years(MIN_DURATION)

In [None]:

profiles = get_all_profiles(hrflow_client, source_key=SOURCE_KEY)
failed = []
profiles_to_archive = []

for profile in tqdm(profiles):
    duration = calculate_duration_in_years(profile["created_at"])
    if duration and duration > min_duration_decimal:
        profiles_to_archive.append(profile)

In [None]:
for profile in tqdm(profiles_to_archive):
    response = hrflow_client.profile.storing.archive(source_key=SOURCE_KEY, key=profile["key"])
    if response["code"] != 200:
        print(f"Failed to archive profile with error: {response}")
        failed.append(profile)

In [None]:
len_profiles_archived = len(profiles_to_archive) - len(failed)
failed_keys = [profile["key"] for profile in failed]

print(f"Number of profiles archived: {len_profiles_archived}")
print(f"Failed profiles: {failed_keys}")