Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add shared storage option #412

Merged
merged 5 commits into from
Mar 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ docker-compose -f docker-compose.cuda.yml up -d --build
| --get-api-key-link | Show a link in the UI where to direct users to get an API key | `Don't show a link` | LT_GET_API_KEY_LINK |
| --require-api-key-origin | Require use of an API key for programmatic access to the API, unless the request origin matches this domain | `No restrictions on domain origin` | LT_REQUIRE_API_KEY_ORIGIN |
| --require-api-key-secret | Require use of an API key for programmatic access to the API, unless the client also sends a secret match | `No secrets required` | LT_REQUIRE_API_KEY_SECRET |
| --shared-storage | Shared storage URI to use for multi-process data sharing (e.g. when using gunicorn) | `memory://` | LT_SHARED_STORAGE |
| --load-only | Set available languages | `all from argostranslate` | LT_LOAD_ONLY |
| --threads | Set number of threads | `4` | LT_THREADS |
| --suggestions | Allow user suggestions | `False` | LT_SUGGESTIONS |
Expand Down
20 changes: 13 additions & 7 deletions libretranslate/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from werkzeug.http import http_date
from flask_babel import Babel

from libretranslate import flood, remove_translated_files, security
from libretranslate import scheduler, flood, secret, remove_translated_files, security, storage
from libretranslate.language import detect_languages, improve_translation_formatting
from libretranslate.locales import (_, _lazy, get_available_locales, get_available_locale_codes, gettext_escaped,
gettext_html, lazy_swag, get_alternate_locale_links)
Expand Down Expand Up @@ -127,6 +127,8 @@ def create_app(args):

bp = Blueprint('Main app', __name__)

storage.setup(args.shared_storage)

if not args.disable_files_translation:
remove_translated_files.setup(get_upload_dir())
languages = load_languages()
Expand Down Expand Up @@ -202,8 +204,12 @@ def resolve_language_locale():

limiter = Limiter()

if args.req_flood_threshold > 0:
flood.setup(args.req_flood_threshold)
if not "gunicorn" in os.environ.get("SERVER_SOFTWARE", ""):
# Gunicorn starts the scheduler in the master process
scheduler.setup(args)

flood.setup(args)
secret.setup(args)

measure_request = None
gauge_request = None
Expand Down Expand Up @@ -261,16 +267,16 @@ def func(*a, **kw):

if (args.require_api_key_secret
and key_missing
and not flood.secret_match(get_req_secret())
and not secret.secret_match(get_req_secret())
):
need_key = True

if need_key:
description = _("Please contact the server operator to get an API key")
if args.get_api_key_link:
description = _("Visit %(url)s to get an API key", url=args.get_api_key_link)
abort(
403,
400,
description=description,
)
return f(*a, **kw)
Expand Down Expand Up @@ -347,7 +353,7 @@ def appjs():
response = Response(render_template("app.js.template",
url_prefix=args.url_prefix,
get_api_key_link=args.get_api_key_link,
api_secret=flood.get_current_secret() if args.require_api_key_secret else ""), content_type='application/javascript; charset=utf-8')
api_secret=secret.get_current_secret() if args.require_api_key_secret else ""), content_type='application/javascript; charset=utf-8')

if args.require_api_key_secret:
response.headers['Last-Modified'] = http_date(datetime.now())
Expand Down
5 changes: 5 additions & 0 deletions libretranslate/default_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ def _get_value(name, default_value, value_type):
'default_value': False,
'value_type': 'bool'
},
{
'name': 'SHARED_STORAGE',
'default_value': 'memory://',
'value_type': 'str'
},
{
'name': 'LOAD_ONLY',
'default_value': None,
Expand Down
64 changes: 18 additions & 46 deletions libretranslate/flood.py
Original file line number Diff line number Diff line change
@@ -1,75 +1,47 @@
import atexit
import random
import string
from libretranslate.storage import get_storage

from apscheduler.schedulers.background import BackgroundScheduler

def generate_secret():
return ''.join(random.choices(string.ascii_uppercase + string.digits, k=7))

banned = {}
active = False
threshold = -1
secrets = [generate_secret(), generate_secret()]

def forgive_banned():
global banned
global threshold

clear_list = []
s = get_storage()
banned = s.get_all_hash_int("banned")

for ip in banned:
if banned[ip] <= 0:
clear_list.append(ip)
else:
banned[ip] = min(threshold, banned[ip]) - 1
s.set_hash_int("banned", ip, min(threshold, banned[ip]) - 1)

for ip in clear_list:
del banned[ip]

def rotate_secrets():
global secrets
secrets[0] = secrets[1]
secrets[1] = generate_secret()
s.del_hash("banned", ip)

def secret_match(s):
return s in secrets

def get_current_secret():
return secrets[1]

def setup(violations_threshold=100):
def setup(args):
global active
global threshold

active = True
threshold = violations_threshold

scheduler = BackgroundScheduler()
scheduler.add_job(func=forgive_banned, trigger="interval", minutes=30)
scheduler.add_job(func=rotate_secrets, trigger="interval", minutes=30)

scheduler.start()

# Shut down the scheduler when exiting the app
atexit.register(lambda: scheduler.shutdown())

if args.req_flood_threshold > 0:
active = True
threshold = args.req_flood_threshold

def report(request_ip):
if active:
banned[request_ip] = banned.get(request_ip, 0)
banned[request_ip] += 1

get_storage().inc_hash_int("banned", request_ip)

def decrease(request_ip):
if banned[request_ip] > 0:
banned[request_ip] -= 1

s = get_storage()
if s.get_hash_int("banned", request_ip) > 0:
s.dec_hash_int("banned", request_ip)

def has_violation(request_ip):
return request_ip in banned and banned[request_ip] > 0

s = get_storage()
return s.get_hash_int("banned", request_ip) > 0

def is_banned(request_ip):
s = get_storage()

# More than X offences?
return active and banned.get(request_ip, 0) >= threshold
return active and s.get_hash_int("banned", request_ip) >= threshold
7 changes: 7 additions & 0 deletions libretranslate/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,13 @@ def get_args():
action="store_true",
help="Require use of an API key for programmatic access to the API, unless the client also sends a secret match",
)
parser.add_argument(
"--shared-storage",
type=str,
default=DEFARGS['SHARED_STORAGE'],
metavar="<Storage URI>",
help="Shared storage URI to use for multi-process data sharing (e.g. via gunicorn)",
)
parser.add_argument(
"--load-only",
type=operator.methodcaller("split", ","),
Expand Down
23 changes: 23 additions & 0 deletions libretranslate/scheduler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import atexit
from apscheduler.schedulers.background import BackgroundScheduler
scheduler = None

def setup(args):
from libretranslate.flood import forgive_banned
from libretranslate.secret import rotate_secrets

global scheduler

if scheduler is None:
scheduler = BackgroundScheduler()

if args.req_flood_threshold > 0:
scheduler.add_job(func=forgive_banned, trigger="interval", minutes=10)

if args.api_keys and args.require_api_key_secret:
scheduler.add_job(func=rotate_secrets, trigger="interval", minutes=30)

scheduler.start()

# Shut down the scheduler when exiting the app
atexit.register(lambda: scheduler.shutdown())
28 changes: 28 additions & 0 deletions libretranslate/secret.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import atexit
import random
import string

from libretranslate.storage import get_storage

def generate_secret():
return ''.join(random.choices(string.ascii_uppercase + string.digits, k=7))

def rotate_secrets():
s = get_storage()
secret_1 = s.get_str("secret_1")
s.set_str("secret_0", secret_1)
s.set_str("secret_1", generate_secret())


def secret_match(secret):
s = get_storage()
return secret == s.get_str("secret_0") or secret == s.get_str("secret_1")

def get_current_secret():
return get_storage().get_str("secret_1")

def setup(args):
if args.api_keys and args.require_api_key_secret:
s = get_storage()
s.set_str("secret_0", generate_secret())
s.set_str("secret_1", generate_secret())
Loading