In [1]:
%cd ..

/home/prayson/Code/hadithi/dev.io


In [172]:
from pathlib import Path
from zipfile import ZipFile

from httpx import AsyncClient, Client
from mimesis import Person
from mimesis.locales import Locale
import jmespath
import pandas as pd

In [89]:
MIMESIS_LOCALE_URL = "https://mimesis.name/en/master/locales.html#overriding-locale"
COUNTRY_CODE_CSV = "assets/languages.csv"
[language_codes, *_] = pd.read_html(MIMESIS_LOCALE_URL)
country_codes = pd.read_csv(COUNTRY_CODE_CSV, usecols=["Country", "Two Letter"])

In [110]:
country_iso = (
    country_codes
     .transform(lambda d: d.str.lower())
     .rename(columns={"Country": "country", "Two Letter":"code"})
     #.drop_duplicates(subset=["country"], keep="first")
     .assign(code = lambda d: d["code"].str.split("-").str[0])
     .query("code != 'en'")
)

In [111]:
country_language = (language_codes[["Code", "Name"]]
     .transform(lambda d: d.str.lower())
     .merge(country_iso, how="left", left_on="Code", right_on="code")
     .dropna(subset=["country", "code"])
     .loc[:, ["code", "country"]]
     .set_index("country")
     .to_dict()
     .get("code")

)

In [5]:
DATA_DIR = Path("assets/books.zip")

with ZipFile(DATA_DIR) as z:
    print(z.namelist())
    files = (file for file in z.namelist() if Path(file).suffix == ".csv")
    data = {Path(file).stem.lower(): pd.read_csv(z.extract(file),low_memory=False) for file in files}     

['Books.csv', 'Ratings.csv', 'Users.csv', 'recsys_taxonomy2.png']


In [140]:

def get_name(language:str) -> str:
    person = Person(language)
    return person.full_name()


In [156]:
users = pd.read_csv("assets/users.csv", dtype_backend="pyarrow", dtype={"age":"Int64"})

In [157]:
users

Unnamed: 0,user_id,fake_name,location,age
0,1,Catrice Riggs,"nyc, new york, usa",
1,2,Johnson Colon,"stockton, california, usa",18
2,3,Лолита Кузьмин,"moscow, yukon territory, russia",
3,4,Abraim Melo,"porto, v.n.gaia, portugal",17
4,5,Barton King,"farnborough, hants, united kingdom",
...,...,...,...,...
278853,278854,Renaldo Gomez,"portland, oregon, usa",
278854,278855,Elizbeth Bird,"tacoma, washington, united kingdom",50
278855,278856,Arda Guichard,"brampton, ontario, canada",
278856,278857,Clifford Erickson,"knoxville, tennessee, usa",


In [246]:
client = Client(base_url="https://openlibrary.org/api")
ISBN = "0395974682"  #"0140301690" - Alice in Wonderland
r = client.get(url="/books", params={"bibkeys": f"ISBN:{ISBN}", "format":"json"}, timeout=10)

In [247]:
b = r.json()
b

{'ISBN:0395974682': {'bib_key': 'ISBN:0395974682',
  'info_url': 'https://openlibrary.org/books/OL7468970M/The_Lord_of_the_Rings',
  'preview': 'restricted',
  'preview_url': 'https://archive.org/details/lordofringsonevo00jrrt',
  'thumbnail_url': 'https://covers.openlibrary.org/b/id/8314546-S.jpg'}}

In [248]:
open_client = Client(base_url="https://openlibrary.org")

In [249]:
[url, *_] = jmespath.search("*.info_url", b)
work = url.split("/")[-2]

r = open_client.get(f"/books/{work}.json", timeout=10)
r

<Response [200 OK]>

In [250]:
r.json()

{'publishers': ['Houghton Mifflin Company'],
 'number_of_pages': 1137,
 'table_of_contents': [{'level': 0,
   'label': '',
   'title': 'The fellowship of the ring --',
   'pagenum': ''},
  {'level': 0, 'label': '', 'title': 'The two towers --', 'pagenum': ''},
  {'level': 0,
   'label': '',
   'title': 'The return of the king.',
   'pagenum': ''}],
 'description': "One Ring to rule them all, One Ring to find them, One Ring to bring them all and in the darkness bind them\r\n\r\nIn ancient times the Rings of Power were crafted by the Elven-smiths, and Sauron, The Dark Lord, forged the One Ring, filling it with his own power so that he could rule all others. But the One Ring was taken from him, and though he sought it throughout Middle-earth, it remained lost to him. After many ages, it fell by chance into the hands of the hobbit Bilbo Baggins.\r\n\r\nFrom his fastness in the Dark Tower of Mordor, Sauron's power spread far and wide. He gathered all the Great Rings to him, but always he se

In [251]:
b = r.json()

g = open_client.get(url=f'{b.get("works")[0].get("key")}.json', timeout=10)
g

<Response [200 OK]>

In [252]:
g.json()

{'description': {'type': '/type/text',
  'value': 'Originally published from 1954 through 1956, J.R.R. Tolkien\'s richly complex series ushered in a new age of epic adventure storytelling. A philologist and illustrator who took inspiration from his work, Tolkien invented the modern heroic quest novel from the ground up, creating not just a world, but a domain, not just a lexicon, but a language, that would spawn countless imitators and lead to the inception of the epic fantasy genre. Today, THE LORD OF THE RINGS is considered "the most influential fantasy novel ever written." (THE ENCYCLOPEDIA OF FANTASY)\r\n\r\nDuring his travels across Middle-earth, the hobbit Bilbo Baggins had found the Ring. But the simple band of gold was far from ordinary; it was in fact the One Ring - the greatest of the ancient Rings of Power. Sauron, the Dark Lord, had infused it with his own evil magic, and when it was lost, he was forced to flee into hiding.\r\n\r\nBut now Sauron\'s exile has ended and his p

In [158]:
# https://openlibrary.org/works/OL18020194W/bookshelves.json
# https://openlibrary.org/works/OL18020194W/ratings.json

In [253]:
open_client.get(url=f'{jmespath.search("works[0].key", b)}/bookshelves.json', timeout=10).json()

{'counts': {'want_to_read': 1308,
  'currently_reading': 102,
  'already_read': 119}}

In [254]:
open_client.get(url=f'{jmespath.search("works[0].key", b)}/ratings.json', timeout=10).json()

{'summary': {'average': 4.53030303030303,
  'count': 66,
  'sortable': 4.209366243712695},
 'counts': {'1': 2, '2': 2, '3': 6, '4': 5, '5': 51}}

In [255]:
jmespath.search("authors[0].author.key", g.json())

'/authors/OL26320A'

In [256]:
open_client.get(url=f'{jmespath.search("authors[0].author.key", g.json())}.json', timeout=10).json()

{'fuller_name': 'John Ronald Reuel Tolkien',
 'links': [{'title': 'The Tolkien Society',
   'url': 'http://www.tolkiensociety.org/index.html',
   'type': {'key': '/type/link'}},
  {'title': 'The Official Website of the JRR Tolkien Estate',
   'url': 'http://www.tolkienestate.com/',
   'type': {'key': '/type/link'}},
  {'url': 'http://www.theonering.com/',
   'title': 'TheOneRing.com',
   'type': {'key': '/type/link'}},
  {'title': 'Planet Tolkien',
   'url': 'http://www.planet-tolkien.com/',
   'type': {'key': '/type/link'}},
  {'url': 'http://www.tolkienlibrary.com/',
   'title': 'The Tolkien Library: Collecting Tolkien books',
   'type': {'key': '/type/link'}}],
 'key': '/authors/OL26320A',
 'name': 'J.R.R. Tolkien',
 'birth_date': '3 January 1892',
 'death_date': '2 September 1973',
 'photos': [6155606, 6433524, 6433525, 6433526, 5731377, -1],
 'personal_name': 'J. R. R. Tolkien',
 'alternate_names': ['John Ronald Reuel Tolkien',
  'J. R. R. Tolkein',
  'J.R.R. Tolkein',
  'Dzhon R.

In [None]:
https://openlibrary.org/books/OL3597106M.json

In [220]:
data["books"].sort_values("Year-Of-Publication")

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
212233,0947901051,Collect Post Office Cards,A. Roberts,0,Benham (A. Buckingham) Ltd,http://images.amazon.com/images/P/0947901051.0...,http://images.amazon.com/images/P/0947901051.0...,http://images.amazon.com/images/P/0947901051.0...
51621,0006479561,Sacred Clowns,Tony Hillerman,0,Fairmount Books Ltd Remainders,http://images.amazon.com/images/P/0006479561.0...,http://images.amazon.com/images/P/0006479561.0...,http://images.amazon.com/images/P/0006479561.0...
193438,0140382690,The Enemy You Killed,Peter McFarlane,0,Penguin Books Australia Ltd,http://images.amazon.com/images/P/0140382690.0...,http://images.amazon.com/images/P/0140382690.0...,http://images.amazon.com/images/P/0140382690.0...
25016,0879972734,Walkers On the Sky,David J Lake,0,Penguin Putnam~mass,http://images.amazon.com/images/P/0879972734.0...,http://images.amazon.com/images/P/0879972734.0...,http://images.amazon.com/images/P/0879972734.0...
17668,0590023888,Misty of Chincoteague,Marguerite Henry,0,Scholastic Inc,http://images.amazon.com/images/P/0590023888.0...,http://images.amazon.com/images/P/0590023888.0...,http://images.amazon.com/images/P/0590023888.0...
...,...,...,...,...,...,...,...,...
80264,0140301690,Alice's Adventures in Wonderland and Through t...,Lewis Carroll,2050,Puffin Books,http://images.amazon.com/images/P/0140301690.0...,http://images.amazon.com/images/P/0140301690.0...,http://images.amazon.com/images/P/0140301690.0...
97826,0140201092,Outline of European Architecture (Pelican S.),Nikolaus Pevsner,2050,Penguin USA,http://images.amazon.com/images/P/0140201092.0...,http://images.amazon.com/images/P/0140201092.0...,http://images.amazon.com/images/P/0140201092.0...
209538,078946697X,"DK Readers: Creating the X-Men, How It All Beg...",2000,DK Publishing Inc,http://images.amazon.com/images/P/078946697X.0...,http://images.amazon.com/images/P/078946697X.0...,http://images.amazon.com/images/P/078946697X.0...,
221678,0789466953,"DK Readers: Creating the X-Men, How Comic Book...",2000,DK Publishing Inc,http://images.amazon.com/images/P/0789466953.0...,http://images.amazon.com/images/P/0789466953.0...,http://images.amazon.com/images/P/0789466953.0...,
