Skip to content

Commit

Permalink
Some more performance tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
wragge committed Nov 7, 2019
1 parent a848b8e commit 75ebfbf
Showing 1 changed file with 71 additions and 54 deletions.
125 changes: 71 additions & 54 deletions random_work_by_facets.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": 135,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -44,7 +44,7 @@
},
{
"cell_type": "code",
"execution_count": 139,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -54,7 +54,7 @@
},
{
"cell_type": "code",
"execution_count": 142,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -105,7 +105,8 @@
" params = {\n",
" 'zone': zone,\n",
" 'encoding': 'json',\n",
" 'n': '100',\n",
" # Keeping this at 0 until we've filtered the results speeds things up\n",
" 'n': '0',\n",
" 'key': API_KEY,\n",
" 'facet': 'all',\n",
" 'include': 'links'\n",
Expand All @@ -116,15 +117,16 @@
" applied_facets.append(key)\n",
" response = s.get(API_URL, params=params)\n",
" data = response.json()\n",
" total = int(data['response']['zone'][0]['records']['total']) \n",
" total = int(data['response']['zone'][0]['records']['total'])\n",
" facets = get_facets(data)\n",
" facets[:] = [f for f in facets if f.get('facet') not in applied_facets]\n",
" # Keep going until we either have less than 100 results or we run out of facets\n",
" while total == 0 or total > 100 or len(facets) == 0:\n",
" while total > 100 and len(facets) > 0:\n",
" # print(f'Facets: {len(facets)}')\n",
" # Select another facet\n",
" new_facet = random.choice(facets)\n",
" # Add it to the applied list\n",
" applied_facets.append(new_facet)\n",
" applied_facets.append(new_facet['facet'])\n",
" # Add the new facet as a parameter\n",
" params[f'l-{new_facet[\"facet\"]}'] = random.choice(new_facet['terms'])\n",
" # Get the new results\n",
Expand All @@ -137,10 +139,20 @@
" total = int(data['response']['zone'][0]['records']['total'])\n",
" # print(total)\n",
" # print(response.url)\n",
" return random.choice(data['response']['zone'][0]['records']['work'])\n",
" if total > 0:\n",
" params['n'] = '100'\n",
" # Cleaning up a bit\n",
" params.pop('facet', None)\n",
" response = s.get(API_URL, params=params)\n",
" data = response.json()\n",
" work = random.choice(data['response']['zone'][0]['records']['work'])\n",
" return work\n",
"\n",
"\n",
"def get_zones(data):\n",
" '''\n",
" Find which zones have results in them.\n",
" '''\n",
" zones = []\n",
" for zone in data['response']['zone']:\n",
" if int(zone['records']['total']) > 0:\n",
Expand All @@ -161,8 +173,10 @@
" else:\n",
" params['zone'] = 'book,article,picture,map,music,collection'\n",
" params = set_query(params, query, add_word)\n",
" # Add any supplied facets\n",
" for key, value in kwargs.items():\n",
" params[f'l-{key}'] = value\n",
" # Make sure that at least some zones have results\n",
" while len(zones) == 0 and tries <=10:\n",
" params = set_query(params, query, add_word, add_number)\n",
" response = s.get(API_URL, params=params)\n",
Expand All @@ -186,30 +200,30 @@
},
{
"cell_type": "code",
"execution_count": 143,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'id': '197897046',\n",
" 'url': '/work/197897046',\n",
" 'troveUrl': 'https://trove.nla.gov.au/work/197897046',\n",
" 'title': 'Whay family cricket match',\n",
" 'issued': 1930,\n",
"{'id': '197896325',\n",
" 'url': '/work/197896325',\n",
" 'troveUrl': 'https://trove.nla.gov.au/work/197896325',\n",
" 'title': \"Quong Tart's funeral cortege leaving Ashfield residence\",\n",
" 'issued': 1903,\n",
" 'type': ['Photograph'],\n",
" 'holdingsCount': 1,\n",
" 'versionCount': 1,\n",
" 'relevance': {'score': '814.3908', 'value': 'very relevant'},\n",
" 'relevance': {'score': '805.48956', 'value': 'very relevant'},\n",
" 'identifier': [{'type': 'url',\n",
" 'linktype': 'fulltext',\n",
" 'value': 'http://www.chia.chinesemuseum.com.au/objects/D002813.htm'},\n",
" 'value': 'http://www.chia.chinesemuseum.com.au/objects/D001092.htm'},\n",
" {'type': 'url',\n",
" 'linktype': 'thumbnail',\n",
" 'value': 'http://www.chia.chinesemuseum.com.au/objects/thumbs/tn_RA097.JPG'}]}"
" 'value': 'http://www.chia.chinesemuseum.com.au/objects/thumbs/tn_P00842_00001.JPG'}]}"
]
},
"execution_count": 143,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -229,33 +243,38 @@
},
{
"cell_type": "code",
"execution_count": 146,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'id': '231436550',\n",
" 'url': '/work/231436550',\n",
" 'troveUrl': 'https://trove.nla.gov.au/work/231436550',\n",
" 'title': 'Carinhall Musikzimmer / Farben-Foto-Atelier M. Müller & Sohn, K.G., Zweigniederlessung Berlin SW68',\n",
" 'contributor': ['M. Müller & Sohn'],\n",
" 'issued': '1935-1942',\n",
"{'id': '231674897',\n",
" 'url': '/work/231674897',\n",
" 'troveUrl': 'https://trove.nla.gov.au/work/231674897',\n",
" 'title': \"Boyd's Bay Bridge, Tweed Heads 1930s\",\n",
" 'contributor': ['Aussie~mobs'],\n",
" 'issued': 2012,\n",
" 'type': ['Photograph'],\n",
" 'holdingsCount': 0,\n",
" 'versionCount': 1,\n",
" 'relevance': {'score': '0.02249495', 'value': 'vaguely relevant'},\n",
" 'holdingsCount': 1,\n",
" 'versionCount': 3,\n",
" 'relevance': {'score': '0.020671109', 'value': 'vaguely relevant'},\n",
" 'snippet': ' the same spot…And it just hung on cables! And often when it would come back down it <b>wouldn</b>’t sit on',\n",
" 'identifier': [{'type': 'url',\n",
" 'linktype': 'fulltext',\n",
" 'linktext': 'digital file from original photo, front',\n",
" 'value': 'http://hdl.loc.gov/loc.pnp/ds.12272'},\n",
" 'value': 'https://www.flickr.com/photos/70994841@N07/7990079055'},\n",
" {'type': 'url',\n",
" 'linktype': 'fulltext',\n",
" 'linktext': 'digital file from original photo, back',\n",
" 'value': 'http://hdl.loc.gov/loc.pnp/ds.12273'}]}"
" 'value': 'https://www.flickr.com/photos/70994841@N07/7990080389'},\n",
" {'type': 'url',\n",
" 'linktype': 'fulltext',\n",
" 'value': 'https://www.flickr.com/photos/70994841@N07/7990077627'},\n",
" {'type': 'url',\n",
" 'linktype': 'thumbnail',\n",
" 'value': 'https://live.staticflickr.com/8296/7990077627_f84f8066e0_t.jpg'}]}"
]
},
"execution_count": 146,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -275,36 +294,34 @@
},
{
"cell_type": "code",
"execution_count": 148,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'id': '193448264',\n",
" 'url': '/work/193448264',\n",
" 'troveUrl': 'https://trove.nla.gov.au/work/193448264',\n",
" 'title': 'The Mikasa once more afloat in Sasebo harbour',\n",
" 'type': ['Photograph'],\n",
"{'id': '16575570',\n",
" 'url': '/work/16575570',\n",
" 'troveUrl': 'https://trove.nla.gov.au/work/16575570',\n",
" 'title': 'Gertrude (Jean) Williams interviewed by Jennifer Gall',\n",
" 'contributor': ['Williams, Jean (Gertrude Jean), 1909-1999'],\n",
" 'issued': 1989,\n",
" 'type': ['Sound/Interview, lecture, talk', 'Sound'],\n",
" 'holdingsCount': 1,\n",
" 'versionCount': 1,\n",
" 'relevance': {'score': '0.0061441823', 'value': 'vaguely relevant'},\n",
" 'snippet': ['Newspaper Clipping. <b>This</b> image is for personal use only. To publish or display it, contact the',\n",
" ' State Library of Western Australia. Reproduction of a sketch. <b>This</b> image is part of a collection of 69'],\n",
" 'relevance': {'score': '7.267762E-4', 'value': 'vaguely relevant'},\n",
" 'snippet': ['-200321935 Jean Williams describes <b>her</b> experiences as a foreigner living in Japan over a 50 year period',\n",
" \" to Japanese libraries; husband's research projects and writing; <b>her</b> interest in netsuki; husband's\"],\n",
" 'identifier': [{'type': 'url',\n",
" 'linktype': 'unknown',\n",
" 'linktext': 'view the album',\n",
" 'value': 'http://purl.slwa.wa.gov.au/slwa_b3012552_79'},\n",
" {'type': 'url',\n",
" 'linktype': 'unknown',\n",
" 'linktext': 'The Mikasa once more afloat in Sasebo harbour',\n",
" 'value': 'http://purl.slwa.wa.gov.au/slwa_b4184762_1'},\n",
" 'linktype': 'fulltext',\n",
" 'linktext': 'National Library of Australia digitised item',\n",
" 'value': 'http://nla.gov.au/nla.obj-200321935'},\n",
" {'type': 'url',\n",
" 'linktype': 'thumbnail',\n",
" 'value': 'http://purl.slwa.wa.gov.au/slwa_b4184762_1.png'}]}"
" 'value': 'http://nla.gov.au/nla.obj-200321935-t'}]}"
]
},
"execution_count": 148,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -358,15 +375,15 @@
},
{
"cell_type": "code",
"execution_count": 131,
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The slowest run took 7.29 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
"3.08 s ± 1.87 s per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
"The slowest run took 5.62 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
"2.17 s ± 1.26 s per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
Expand Down

0 comments on commit 75ebfbf

Please sign in to comment.