Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
938b021
chore: add test notebook in gitignore
corentin-hrflow Mar 8, 2024
74481d8
build: add package like tqdm, opepyxl
corentin-hrflow Mar 8, 2024
e21c348
refactor: split utils init in files
corentin-hrflow Mar 8, 2024
9e6eff9
fix(parsing): use open as binary and fix status code check in add_folder
corentin-hrflow Mar 8, 2024
f52abfa
feat: add limit rate
corentin-hrflow Mar 8, 2024
9bfd7b2
refactor: remove unnecessary hrflow parent dir to avoid import hrflow…
corentin-hrflow Mar 8, 2024
3c54b90
feat: add rate_limiter in init of utils
corentin-hrflow Mar 8, 2024
65982da
feat: add rate_limiter to add_file and add move_failure_to
corentin-hrflow Mar 8, 2024
f2b61e5
feat: add get all profiles and jobs in utils
corentin-hrflow Mar 8, 2024
c35c3ed
fix: cyclic import due to typing
corentin-hrflow Mar 8, 2024
1557932
feat: add generate report for parsing evaluation
corentin-hrflow Mar 8, 2024
7ecf92d
style: format with black and flake8
corentin-hrflow Mar 8, 2024
f8e880f
build: add new preminor version
corentin-hrflow Mar 8, 2024
e364f6f
feat: add progress in add_folder parsing
corentin-hrflow Mar 8, 2024
a78affb
fix: forget to move pydantic from dev to prod dependencies
corentin-hrflow Mar 8, 2024
5107b00
build: upgrade package version
corentin-hrflow Mar 8, 2024
2835644
fix: file name during parsing
corentin-hrflow Mar 8, 2024
e204930
build: upgrade package version
corentin-hrflow Mar 8, 2024
bcf0f18
refactor: move some utils to core like validation or rate limit
corentin-hrflow Mar 8, 2024
9fe05e9
feat: add rate limit to all endpoint
corentin-hrflow Mar 11, 2024
aff200f
test: add rate limit tests
corentin-hrflow Mar 11, 2024
2653c32
test: improve assert message for scoring and ignore meta.count
corentin-hrflow Mar 11, 2024
8863ff7
chore: add in makefile clean_cache in clean command
corentin-hrflow Mar 11, 2024
d4d0749
style: format with black
corentin-hrflow Mar 11, 2024
1486b37
fix: pull from master and fix conflics
corentin-hrflow Mar 11, 2024
c94e356
docs: fix typo
Thomas65535 Mar 12, 2024
7e972d0
fix: replace file.read by file directly in add_folder
corentin-hrflow Mar 12, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ test/*
test_assets/*
tests/assets
.htpasswd
test.ipynb

docker/dependencies/libs/*

Expand Down
2 changes: 1 addition & 1 deletion Documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ Here is an example on how to handle webhooks
* Here an example on how to get help:

```python
>>> from hrflow.hrflow.profile.parsing import ProfileParsing
>>> from hrflow.profile.parsing import ProfileParsing
>>> help(ProfileParsing.get)

#Help on function get in module hrflow.profile.parsing:
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Define variables
ARGS :=

clean:
clean: clean_cache
rm -rf build dist *.egg-info

clean_cache:
Expand Down
13 changes: 6 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,11 @@ with open("path_to_file.pdf", "rb") as f:

#Parse it using this method without reference:
response = client.profile.parsing.add_file(
source_key="INSERT_THE_TARGET_SOURCE_KEY",
profile_file=file,
sync_parsing=1, # This is to invoke real time parsing
tags=[{"name": "application_reference", "value": "TS_X12345"}], # Attach an application tag to the profile to be parsed
)

source_key="INSERT_THE_TARGET_SOURCE_KEY",
profile_file=file,
sync_parsing=1, # This is to invoke real time parsing
tags=[{"name": "application_reference", "value": "TS_X12345"}], # Attach an application tag to the profile to be parsed
)
```


Expand All @@ -79,4 +78,4 @@ response = client.profile.parsing.add_file(
- [HrFlow.ai Academy](https://www.youtube.com/@hrflow.aiacademy9534) on Youtube for videos on how to get started with HrFlow.ai
- [Updates page](https://updates.hrflow.ai/) to keep you informed about our product releases
- [Documentation](https://developers.hrflow.ai/reference/authentication) to provide information on HrFlow.ai features
- [Our Roadmap](https://roadmap.hrflow.ai/) to show upcoming features or request new ones
- [Our Roadmap](https://roadmap.hrflow.ai/) to show upcoming features or request new ones
2 changes: 1 addition & 1 deletion hrflow/__init__.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
__url__,
__version__,
)
from .hrflow.hrflow import Hrflow
from .hrflow import Hrflow
4 changes: 3 additions & 1 deletion hrflow/hrflow/auth/__init__.py → hrflow/auth/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import typing as t

from ..utils import validate_key, validate_response
from ..core.rate_limit import rate_limiter
from ..core.validation import validate_key, validate_response

API_SECRET_REGEX = r"^ask[rw]?_[0-9a-f]{32}$"

Expand All @@ -9,6 +10,7 @@ class Auth:
def __init__(self, api):
self.client = api

@rate_limiter
def get(self) -> t.Dict[str, t.Any]:
"""
Try your API Keys. This endpoint allows you to learn how to add the right
Expand Down
5 changes: 4 additions & 1 deletion hrflow/hrflow/board/__init__.py → hrflow/board/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ..utils import (
from ..core.rate_limit import rate_limiter
from ..core.validation import (
ORDER_BY_VALUES,
validate_key,
validate_limit,
Expand All @@ -12,6 +13,7 @@ class Board(object):
def __init__(self, client):
self.client = client

@rate_limiter
def list(self, name=None, page=1, limit=30, sort_by="date", order_by="desc"):
"""
Search boards for given filters.
Expand Down Expand Up @@ -42,6 +44,7 @@ def list(self, name=None, page=1, limit=30, sort_by="date", order_by="desc"):
response = self.client.get("boards", query_params)
return validate_response(response)

@rate_limiter
def get(self, key=None):
"""
Get source given a board key.
Expand Down
37 changes: 37 additions & 0 deletions hrflow/core/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import os

from .validation import (
is_valid_extension,
is_valid_filename,
validate_key,
validate_reference,
)


def format_item_payload(item, provider_key, key, reference=None, email=None):
provider = "source_key" if item == "profile" else "board_key"

payload = {provider: validate_key("provider", provider_key)}
if key:
payload["key"] = validate_key("item", key)
if reference:
payload["reference"] = validate_reference(reference)
if email:
payload["profile_email"] = email

return payload


def get_files_from_dir(dir_path, is_recurcive):
file_res = []
files_path = os.listdir(dir_path)

for file_path in files_path:
true_path = os.path.join(dir_path, file_path)
if os.path.isdir(true_path) and is_recurcive:
if is_valid_filename(true_path):
file_res += get_files_from_dir(true_path, is_recurcive)
continue
if is_valid_extension(true_path):
file_res.append(true_path)
return file_res
59 changes: 59 additions & 0 deletions hrflow/core/rate_limit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from functools import wraps
from time import sleep, time

DEFAULT_MAX_REQUESTS_PER_MINUTE = None
DEFAULT_MIN_SLEEP_PER_REQUEST = 0
SECONDS_IN_MINUTE = 60


def rate_limiter(func):
"""
Decorator that applies rate limiting to a function.

Parameters in the decorated function:
max_requests_per_minute: <int> The maximum number of requests that can be made
in a minute. If None, there is no limit.
min_sleep_per_request: <float> The minimum time to wait between requests.

Usage:
>>> @rate_limiter()
... def my_function(param1, param2):
... pass
... my_function(1, 2, max_requests_per_minute=10, min_sleep_per_request=0.1)
... # The function will be called at most 10 times per minute
... # with at least 0.1 seconds between each call
"""
requests_per_minute = 0
last_reset_time = time()

@wraps(func)
def wrapper(*args, **kwargs):
max_requests_per_minute = kwargs.pop(
"max_requests_per_minute", DEFAULT_MAX_REQUESTS_PER_MINUTE
)
min_sleep_per_request = kwargs.pop(
"min_sleep_per_request", DEFAULT_MIN_SLEEP_PER_REQUEST
)
nonlocal requests_per_minute, last_reset_time

current_time = time()
elapsed_time = current_time - last_reset_time

if elapsed_time < SECONDS_IN_MINUTE:
requests_per_minute += 1
if (
max_requests_per_minute is not None
and requests_per_minute > max_requests_per_minute
):

sleep(SECONDS_IN_MINUTE - elapsed_time)
requests_per_minute = 0
last_reset_time = time()
else:
requests_per_minute = 0
last_reset_time = current_time

sleep(min_sleep_per_request)
return func(*args, **kwargs)

return wrapper
32 changes: 0 additions & 32 deletions hrflow/hrflow/utils/__init__.py → hrflow/core/validation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
import re
import typing as t

KEY_REGEX = r"^[0-9a-f]{40}$"
STAGE_VALUES = [None, "new", "yes", "later", "no"]
Expand Down Expand Up @@ -33,22 +32,6 @@
]
INVALID_FILENAME = [".", ".."]

ITEM_TYPE = ["profile", "job"]


def format_item_payload(item, provider_key, key, reference=None, email=None):
provider = "source_key" if item == "profile" else "board_key"

payload = {provider: validate_key("provider", provider_key)}
if key:
payload["key"] = validate_key("item", key)
if reference:
payload["reference"] = validate_reference(reference)
if email:
payload["profile_email"] = email

return payload


def validate_boolean(name, value):
"""
Expand Down Expand Up @@ -126,21 +109,6 @@ def is_valid_filename(file_path):
return name not in INVALID_FILENAME


def get_files_from_dir(dir_path, is_recurcive):
file_res = []
files_path = os.listdir(dir_path)

for file_path in files_path:
true_path = os.path.join(dir_path, file_path)
if os.path.isdir(true_path) and is_recurcive:
if is_valid_filename(true_path):
file_res += get_files_from_dir(true_path, is_recurcive)
continue
if is_valid_extension(true_path):
file_res.append(true_path)
return file_res


def validate_response(response):
if response.headers["Content-Type"] != "application/json":
return {
Expand Down
File renamed without changes.
Empty file removed hrflow/hrflow/__init__.py
Empty file.
File renamed without changes.
9 changes: 8 additions & 1 deletion hrflow/hrflow/job/asking.py → hrflow/job/asking.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
import typing as t

from ..utils import KEY_REGEX, validate_key, validate_reference, validate_response
from ..core.rate_limit import rate_limiter
from ..core.validation import (
KEY_REGEX,
validate_key,
validate_reference,
validate_response,
)


class JobAsking:
def __init__(self, api):
self.client = api

@rate_limiter
def get(
self,
board_key: str,
Expand Down
5 changes: 4 additions & 1 deletion hrflow/hrflow/job/embedding.py → hrflow/job/embedding.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from ..utils import format_item_payload, validate_response
from ..core import format_item_payload
from ..core.rate_limit import rate_limiter
from ..core.validation import validate_response


class JobEmbedding:
Expand All @@ -8,6 +10,7 @@ def __init__(self, api):
"""Init."""
self.client = api

@rate_limiter
def get(self, board_key, key=None, reference=None):
"""
Retrieve the parsing information.
Expand Down
File renamed without changes.
File renamed without changes.
4 changes: 3 additions & 1 deletion hrflow/hrflow/job/scoring.py → hrflow/job/scoring.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json

from ..utils import (
from ..core.rate_limit import rate_limiter
from ..core.validation import (
ORDER_BY_VALUES,
SORT_BY_VALUES,
STAGE_VALUES,
Expand All @@ -20,6 +21,7 @@ def __init__(self, api):
"""Init."""
self.client = api

@rate_limiter
def list(
self,
board_keys=None,
Expand Down
4 changes: 3 additions & 1 deletion hrflow/hrflow/job/searching.py → hrflow/job/searching.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json

from ..utils import (
from ..core.rate_limit import rate_limiter
from ..core.validation import (
ORDER_BY_VALUES,
SORT_BY_VALUES,
STAGE_VALUES,
Expand All @@ -19,6 +20,7 @@ def __init__(self, api):
"""Init."""
self.client = api

@rate_limiter
def list(
self,
board_keys=None,
Expand Down
10 changes: 8 additions & 2 deletions hrflow/hrflow/job/storing.py → hrflow/job/storing.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json

from ..utils import (
from ..core import format_item_payload
from ..core.rate_limit import rate_limiter
from ..core.validation import (
ORDER_BY_VALUES,
SORT_BY_VALUES,
format_item_payload,
validate_boolean,
validate_key,
validate_limit,
Expand All @@ -28,6 +29,7 @@ def __init__(self, api):
"""
self.client = api

@rate_limiter
def add_json(self, board_key, job_json):
"""This endpoint allows you to Index a Job object.
Note: If your Job is an unstructured text, make sure to parse it first before
Expand Down Expand Up @@ -120,6 +122,7 @@ def add_json(self, board_key, job_json):
response = self.client.post("job/indexing", json=job_json)
return validate_response(response)

@rate_limiter
def edit(self, board_key, job_json, key=None):
"""
Edit a job already stored in the given source.
Expand Down Expand Up @@ -148,6 +151,7 @@ def edit(self, board_key, job_json, key=None):
response = self.client.put("job/indexing", json=job_json)
return validate_response(response)

@rate_limiter
def get(self, board_key, key=None, reference=None):
"""
Retrieve the parsing information.
Expand All @@ -168,6 +172,7 @@ def get(self, board_key, key=None, reference=None):
response = self.client.get("job/indexing", query_params)
return validate_response(response)

@rate_limiter
def archive(self, board_key, key=None, reference=None):
"""
This method allows to archive (is_archive=1) or unarchive (is_archive=0) a job
Expand All @@ -192,6 +197,7 @@ def archive(self, board_key, key=None, reference=None):
response = self.client.patch("job/indexing/archive", json=payload)
return validate_response(response)

@rate_limiter
def list(
self,
board_keys,
Expand Down
File renamed without changes.
9 changes: 8 additions & 1 deletion hrflow/hrflow/profile/asking.py → hrflow/profile/asking.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
import typing as t

from ..utils import KEY_REGEX, validate_key, validate_reference, validate_response
from ..core.rate_limit import rate_limiter
from ..core.validation import (
KEY_REGEX,
validate_key,
validate_reference,
validate_response,
)


class ProfileAsking:
def __init__(self, api):
self.client = api

@rate_limiter
def get(
self,
source_key: str,
Expand Down
Loading