### ICTP-IAEA WORKSHOOP ON ENVIRONMENTAL MAPPING: Mobilising	Trust	in Measurements	and	Engaging Scientific Citizenry, Trieste, March 2017
---


# III. Fetching data from Safecast API with Python

In [7]:
import pandas as pd

In [96]:
# Building the url as a long string in one go
url = "https://api.safecast.org/measurements.json?"\
      "distance=100000&"\
      "longitude=141.0337&"\
      "captured_before=2012-04-10&"\
      "latitude=37.4218&"\
      "per_page=2000&"\
      "captured_after=2011-03-10"

In [97]:
url

'https://api.safecast.org/measurements.json?distance=100000&longitude=141.0337&captured_before=2012-04-10&latitude=37.4218&per_page=2000&captured_after=2011-03-10'

In [92]:
# More advanced but cleaner way to generate the url
params = {'distance': 100000, 'latitude': 37.4218, 'longitude': 141.0337,
          'captured_after': '2011-03-10', 'captured_before': '2012-04-10',
          'per_page': 20000}
basis_url = 'https://api.safecast.org/measurements.json?'
url = basis_url + '&'.join(['%s=%s' % (key, value) for (key, value) in params.items()])

In [93]:
# Import requests package
import requests

# Package the request, send the request and catch the response: r
r = requests.get(url)

json_data = r.json()

In [94]:
df = pd.DataFrame(json_data)

In [95]:
df.head()

Unnamed: 0,captured_at,channel_id,device_id,devicetype_id,height,id,latitude,location_name,longitude,original_id,sensor_id,station_id,unit,user_id,value
0,2012-01-18T02:32:50.000Z,,,,,15500999,37.414865,,141.028495,,,,cpm,1,20283.0
1,2012-01-18T02:32:45.000Z,,,,,15500998,37.414992,,141.028215,,,,cpm,1,19713.0
2,2012-01-18T02:32:55.000Z,,,,,15501000,37.414817,,141.028568,,,,cpm,1,20667.0
3,2012-01-18T02:33:05.000Z,,,,,15501002,37.414787,,141.028612,,,,cpm,1,21206.0
4,2012-01-18T02:33:10.000Z,,,,,15501003,37.414777,,141.028622,,,,cpm,1,21220.0


In [24]:
df.shape

(20000, 15)

In [25]:
# Using pandas to get measurements by user
df.groupby(by='user_id').size()

user_id
1      17813
3        538
148     1649
dtype: int64

In [23]:
df.to_csv('data/safecast_from_api.csv')

## A word on Big data

In [36]:
f = 'data/safecast_big_data.csv'    # Path to file containing the table

# A quick look at first 5 rows
data_iter = pd.read_csv(f, chunksize=5)
next(data_iter)

Unnamed: 0.1,Unnamed: 0,captured_at,channel_id,device_id,devicetype_id,height,id,latitude,location_name,longitude,original_id,sensor_id,station_id,unit,user_id,value
0,0,2012-01-18T02:32:50.000Z,,,,,15500999,37.414865,,141.028495,,,,cpm,1,20283.0
1,1,2012-01-18T02:32:45.000Z,,,,,15500998,37.414992,,141.028215,,,,cpm,1,19713.0
2,2,2012-01-18T02:32:55.000Z,,,,,15501000,37.414817,,141.028568,,,,cpm,1,20667.0
3,3,2012-01-18T02:33:05.000Z,,,,,15501002,37.414787,,141.028612,,,,cpm,1,21206.0
4,4,2012-01-18T02:33:10.000Z,,,,,15501003,37.414777,,141.028622,,,,cpm,1,21220.0


In [37]:
next(data_iter)

Unnamed: 0.1,Unnamed: 0,captured_at,channel_id,device_id,devicetype_id,height,id,latitude,location_name,longitude,original_id,sensor_id,station_id,unit,user_id,value
0,5,2012-01-18T02:33:00.000Z,,,,,15501001,37.414792,,141.028588,,,,cpm,1,20916.0
1,6,2012-01-18T02:33:15.000Z,,,,,15501004,37.41476,,141.028643,,,,cpm,1,20809.0
2,7,2012-01-18T02:38:10.000Z,,,,,15501063,37.414698,,141.02872,,,,cpm,1,20432.0
3,8,2012-01-18T02:38:15.000Z,,,,,15501064,37.414697,,141.02872,,,,cpm,1,20394.0
4,9,2012-01-18T02:38:05.000Z,,,,,15501062,37.414697,,141.02872,,,,cpm,1,20504.0


In [38]:
# Some counts:
#   - Total nb. of measurements
#   - Nb. of measurements by user_id

# Re-initialize iterator
data_iter = pd.read_csv(f, chunksize=1000)

nb_lines = 0
counts_by_user = {}

for chunk in data_iter:
    nb_lines += len(chunk)  
    for entry in chunk['user_id']:
        if entry in counts_by_user.keys():
            counts_by_user[entry] += 1
        else:
            counts_by_user[entry] = 1

In [39]:
counts_by_user

{1: 17813, 3: 538, 148: 1649}

### Exercise

Load from Safecast API:

* first `1000` measurements
* taken between `2011-03-10` and `2017-03-08``
* `10km` around `Trieste`

**Tip: Use google map to locate longitude and latitue coordinates for Trieste**