Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
add more env v to control limit and offset
  • Loading branch information
Feng-Gao committed Mar 12, 2019
1 parent 21f1b7d commit d1803cd
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions scraper.py
Expand Up @@ -10,8 +10,10 @@


#taipei provides an api to fetch all metadata
base_url = 'https://data.taipei/opendata/datalist/apiAccess?scope=datasetMetadataSearch&limit=100&offset='
base_url = 'https://data.taipei/opendata/datalist/apiAccess?scope=datasetMetadataSearch&limit=%d&offset=%d'
index = int(os.environ['MORPH_INDEX'])
limit = int(os.environ['MORPH_LIMIT']
index_offset = int(os.environ['MORPH_OFFSET']
taipei_url = base_url + str(index)
result = requests.get(taipei_url)
package_count = result.json()['result']['count']
Expand All @@ -20,8 +22,8 @@
dataset_count = int(os.environ['MORPH_DCOUNT'])
resource_count = int(os.environ['MORPH_RCOUNT'])
for i in range(index,package_count+1):
index = i*100
taipei_url = base_url + str(index)
index = i*100 + index_offset
taipei_url = base_url % (limit,index)
result = requests.get(taipei_url)
package_list = result.json()['result']['results']

Expand Down

0 comments on commit d1803cd

Please sign in to comment.