Skip to content

Commit

Permalink
[add] v1.3-rc --> A better SID management for Yandex according to #4
Browse files Browse the repository at this point in the history
  • Loading branch information
Animenosekai committed Feb 23, 2021
1 parent b66fb93 commit fe22afe
Show file tree
Hide file tree
Showing 9 changed files with 233 additions and 56 deletions.
130 changes: 128 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,128 @@
.DS_Store
*.pyc
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# macOS
.DS_Store
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
setup(
name = "translatepy",
packages = ["translatepy"],
version = "1.2",
version = "1.3",
license = "GNU General Public License v3 (GPLv3)",
description = "Translate, transliterate, get the language of texts in no time with the help of multiple APIs!",
author = "Anime no Sekai",
author_email = "niichannomail@gmail.com",
url = "https://github.com/Animenosekai/translate",
download_url = "https://github.com/Animenosekai/translate/archive/v1.2.tar.gz",
download_url = "https://github.com/Animenosekai/translate/archive/v1.3.tar.gz",
keywords = ['python', 'translate', 'translation', 'google-translate', 'yandex-translate', 'bing-translate', 'reverso', 'transliteration', 'detect-language'],
install_requires = ['safeIO', 'requests'],
classifiers = ['Development Status :: 4 - Beta', 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9'],
Expand Down
2 changes: 1 addition & 1 deletion translatepy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
__copyright__ = 'Copyright 2021, translate'
__credits__ = ['animenosekai']
__license__ = 'GNU General Public License v3 (GPLv3)'
__version__ = 'translatepy v1.2'
__version__ = 'translatepy v1.3'
__maintainer__ = 'Anime no Sekai'
__email__ = 'niichannomail@gmail.com'
__status__ = 'Beta'
1 change: 0 additions & 1 deletion translatepy/models/languages.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from translatepy.data.data import ALPHA2_TO_ALPHA3
from translatepy.utils.similarity import language_search
from translatepy.models.exceptions import UnknownLanguage
import translatepy

LANGUAGES_CACHES = {}

Expand Down
2 changes: 1 addition & 1 deletion translatepy/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
requests
safeIO
safeIO>=1.2
Empty file.
Empty file.
137 changes: 88 additions & 49 deletions translatepy/translators/yandex.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from time import time
from json import loads
from random import randint
from os.path import dirname, abspath
Expand All @@ -7,6 +8,7 @@

from translatepy.models.languages import Language
from translatepy.models.userAgents import USER_AGENTS
from translatepy.utils.utils import convert_to_float

FILE_LOCATION = dirname(abspath(__file__))

Expand All @@ -29,26 +31,54 @@ class YandexTranslate():
"""
def __init__(self, sid_refresh=False) -> None:
self._base_url = "https://translate.yandex.net/api/v1/tr.json/"
self._sid_cache = TextFile(FILE_LOCATION + "/_yandex_sid.translatepy")
self._sid = self._sid_cache.read()
self._sid_cache = TextFile(FILE_LOCATION + "/_yandex_sid.translatepy", blocking=False)
self._last_tried_cache = TextFile(FILE_LOCATION + "/_yandex_last_tried.translatepy", blocking=False)
with self._sid_cache as cache:
self._sid = str(cache.read())
with self._last_tried_cache as cache:
self._last_tried = convert_to_float(cache.read())
self._headers = self._header()
self._check_increment = 600 # defaults to 10 minutes
if sid_refresh:
self.refreshSID()

def refreshSID(self):
data = get("https://translate.yandex.com/", headers=self._headers).text
sid_position = data.find("Ya.reqid = '")
if sid_position == -1:
return
data = data[sid_position + 12:]
self._sid = data[:data.find("';")]
self._sid_cache.write(self._sid)
"""
Refreshes the SID used for requests to Yandex Translation API
See issue #4 for more information
Randomness is used to prevent bot detection
"""
try:
if time() - self._last_tried > self._check_increment: # if the duration between the last time we tried to get the SID and now is greater than 10 minutes for the first pass
data = get("https://translate.yandex.com/", headers=self._headers).text
sid_position = data.find("Ya.reqid = '")
if sid_position != -1:
data = data[sid_position + 12:]
self._sid = data[:data.find("';")]
self._sid_cache.write(self._sid)

self._check_increment = self._check_increment / 2 + randint(0, 1000) / 1000 # decrementing because it might work decremented
self._last_tried = time() # maybe keep that in a file
self._last_tried_cache.write(self._last_tried)
return True
else:
self._check_increment = self._check_increment * 2 + randint(0, 1000) / 1000 # incrementing the waiting time
self._last_tried = time() # maybe keep that in a file
self._last_tried_cache.write(self._last_tried)
# else
# do nothing as we know that yandex will rate-limit us if we ping too much their website
return False
except:
return False

def _header(self):
"""
Creates a new header
_header might not be appropriate if the _sid is linked to the User-Agent header
"""
_dict = HEADERS
_dict = HEADERS.copy()
randomChoice = randint(0, 7499)
_dict.update({"User-Agent": USER_AGENTS[randomChoice]})
return _dict
Expand All @@ -58,31 +88,31 @@ def translate(self, text, destination_language, source_language="auto"):
Translates the given text to the given language
"""
try:
# preparing the request
if source_language is None or str(source_language) == "auto":
source_language = self.language(text)
if source_language is None:
return None, None
if isinstance(source_language, Language):
source_language = source_language.yandex_translate
if self._sid.replace(" ", "") == "":
self.refreshSID()
url = self._base_url + "translate?id=" + self._sid + "-0-0&srv=tr-text&lang=" + str(source_language) +"-" + str(destination_language) + "&reason=auto&format=text"
request = get(url, headers=self._headers, data={'text': str(text), 'options': '4'})
data = loads(request.text)
if request.status_code < 400 and data["code"] == 200:
data = loads(request.text)
return str(data["lang"]).split("-")[0], data["text"][0]
else:
self.refreshSID()
# redo everything with the new sid
# check if we have an _sid
if self._sid.replace(" ", "") == "" and not self.refreshSID():
return None, None

def _request():
url = self._base_url + "translate?id=" + self._sid + "-0-0&srv=tr-text&lang=" + str(source_language) +"-" + str(destination_language) + "&reason=auto&format=text"
request = get(url, headers=self._headers, data={'text': str(text), 'options': '4'})
data = loads(request.text)
if request.status_code < 400 and data["code"] == 200:
data = loads(request.text)
return str(data["lang"]).split("-")[0], data["text"][0]
else:
return None, None
return None, None

_lang, _text = _request()
if _lang is None or _text is None:
if self.refreshSID():
_lang, _text = _request()
return _lang, _text
except:
return None, None

Expand All @@ -95,18 +125,22 @@ def transliterate(self, text, source_language=None):
source_language = self.language(text)
if source_language is None or source_language not in TRANSLIT_LANGS:
return None, None
if self._sid.replace(" ", "") == "":
self.refreshSID()
request = post("https://translate.yandex.net/translit/translit?sid=" + self._sid + "&srv=tr-text", headers=self._headers, data={'text': str(text), 'lang': source_language})
if request.status_code < 400:
return source_language, request.text[1:-1]
else:
self.refreshSID()

if self._sid.replace(" ", "") == "" and not self.refreshSID():
return None, None

def _request():
request = post("https://translate.yandex.net/translit/translit?sid=" + self._sid + "&srv=tr-text", headers=self._headers, data={'text': str(text), 'lang': source_language})
if request.status_code < 400:
return source_language, request.text[1:-1]
else:
return None, None

_lang, _text = _request()
if _lang is None or _text is None:
if self.refreshSID():
_lang, _text = _request()
return _lang, _text
except:
return None, None

Expand All @@ -118,17 +152,12 @@ def spellcheck(self, text, source_language=None):
if source_language is None:
source_language = self.language(text)
if source_language is None:
return None
if self._sid.replace(" ", "") == "":
self.refreshSID()
request = post("https://speller.yandex.net/services/spellservice.json/checkText?sid=" + self._sid + "&srv=tr-text", headers=self._headers, data={'text': str(text), 'lang': source_language, 'options': 516})
if request.status_code < 400:
data = loads(request.text)
for correction in data:
text = text[:correction.get("pos", 0)] + correction.get("s", [""])[0] + text[correction.get("pos", 0) + correction.get("len", 0):]
return source_language, text
else:
self.refreshSID()
return None, None

if self._sid.replace(" ", "") == "" and not self.refreshSID():
return None, None

def _request():
request = post("https://speller.yandex.net/services/spellservice.json/checkText?sid=" + self._sid + "&srv=tr-text", headers=self._headers, data={'text': str(text), 'lang': source_language, 'options': 516})
if request.status_code < 400:
data = loads(request.text)
Expand All @@ -137,6 +166,12 @@ def spellcheck(self, text, source_language=None):
return source_language, text
else:
return None, None

_lang, _text = _request()
if _lang is None or _text is None:
if self.refreshSID():
_lang, _text = _request()
return _lang, _text
except:
return None, None

Expand All @@ -147,20 +182,24 @@ def language(self, text, hint=None):
try:
if hint is None:
hint = "en,ja"
if self._sid.replace(" ", "") == "":
self.refreshSID()

if self._sid.replace(" ", "") == "" and not self.refreshSID():
return None

url = self._base_url + "detect?sid=" + self._sid + "&srv=tr-text&text=" + str(text) + "&options=1&hint=" + str(hint)
request = get(url, headers=self._headers)
if request.status_code < 400 and request.json()["code"] == 200:
return loads(request.text)["lang"]
else:
self.refreshSID()
url = self._base_url + "detect?sid=" + self._sid + "&srv=tr-text&text=" + str(text) + "&options=1&hint=" + str(hint)

def _request():
request = get(url, headers=self._headers)
if request.status_code < 400 and request.json()["code"] == 200:
return loads(request.text)["lang"]
else:
return None

_lang = _request()
if _lang is None:
if self.refreshSID():
_lang = _request()
return _lang
except:
return None

Expand Down

0 comments on commit fe22afe

Please sign in to comment.