In [1]:
import requests, pprint, json, datetime, time
from pymongo import MongoClient
from secrets import *

### Sign up for free API key at [OpenWeather](https://home.openweathermap.org/users/sign_up)

## Acquring data from API

In [2]:
key = 'put your key here'

In [3]:
city='london'

In [4]:
requestString='https://api.openweathermap.org/data/2.5/weather?q={}&APPID={}'.format(city, key)

In [5]:
res=requests.get(requestString)

In [6]:
res.reason

'OK'

In [7]:
pprint.pprint(res.json())

{'base': 'stations',
 'clouds': {'all': 75},
 'cod': 200,
 'coord': {'lat': 51.51, 'lon': -0.13},
 'dt': 1535323800,
 'id': 2643743,
 'main': {'humidity': 88,
          'pressure': 1005,
          'temp': 290.04,
          'temp_max': 291.15,
          'temp_min': 289.15},
 'name': 'London',
 'sys': {'country': 'GB',
         'id': 5091,
         'message': 0.0036,
         'sunrise': 1535259891,
         'sunset': 1535309891,
         'type': 1},
 'visibility': 10000,
 'weather': [{'description': 'light rain',
              'icon': '10n',
              'id': 500,
              'main': 'Rain'}],
 'wind': {'deg': 250, 'speed': 6.2}}


In [9]:
def getData(city):
    requestString='https://api.openweathermap.org/data/2.5/weather?q={}&APPID={}'.format(city, key)
    res=requests.get(requestString)
    return res

In [10]:
res=getData('Curitiba')

In [11]:
res

<Response [200]>

## Ingesting data into MongoDB

In [12]:
client=MongoClient('172.17.0.2')

In [13]:
db=client.packt

In [14]:
weatherCollection=db.weather

In [15]:
res.json()

{'coord': {'lon': -49.27, 'lat': -25.43},
 'weather': [{'id': 801,
   'main': 'Clouds',
   'description': 'few clouds',
   'icon': '02n'}],
 'base': 'stations',
 'main': {'temp': 281.02,
  'pressure': 1027,
  'humidity': 87,
  'temp_min': 279.15,
  'temp_max': 282.15},
 'visibility': 10000,
 'wind': {'speed': 2.6, 'deg': 110},
 'clouds': {'all': 20},
 'dt': 1535324400,
 'sys': {'type': 1,
  'id': 4481,
  'message': 0.0076,
  'country': 'BR',
  'sunrise': 1535276050,
  'sunset': 1535317399},
 'id': 6322752,
 'name': 'Curitiba',
 'cod': 200}

In [16]:
res=weatherCollection.insert_one(res.json())

## Get list of cities

In [17]:
!wget http://bulk.openweathermap.org/sample/city.list.json.gz

--2018-08-26 20:38:09--  http://bulk.openweathermap.org/sample/city.list.json.gz
Resolving bulk.openweathermap.org (bulk.openweathermap.org)... 78.46.48.103
Connecting to bulk.openweathermap.org (bulk.openweathermap.org)|78.46.48.103|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4292641 (4,1M) [application/octet-stream]
Saving to: ‘city.list.json.gz’


2018-08-26 20:38:43 (126 KB/s) - ‘city.list.json.gz’ saved [4292641/4292641]



In [20]:
!gunzip city.list.json.gz

In [21]:
!head city.list.json

[
  {
    "id": 707860,
    "name": "Hurzuf",
    "country": "UA",
    "coord": {
      "lon": 34.283333,
      "lat": 44.549999
    }
  },


In [22]:
with open('city.list.json','r') as inFile:
    citiesJson=json.loads(inFile.read())

In [23]:
type(citiesJson)

list

## Limit to Chilean cities

In [24]:
citiesJsonCL=list(filter(lambda x:x['country']=='CL', citiesJson))

In [25]:
len(citiesJsonCL)

369

In [26]:
cities=list(map(lambda x:x['name'], citiesJsonCL))
ids=list(map(lambda x:x['id'], citiesJsonCL))

In [27]:
res=getData(cities[0])

In [28]:
res

<Response [200]>

## Cycle through cities

In [29]:
for i,name in zip(ids, cities):
    res=getData(name)
    if not res.status_code==200:
        print('Error grabbing data for {}'.format(name))
        print(res.reason)
    else:
        try:
            weatherCollection.insert_one(res.json())
        except e:
            print('Error inserting into DB {}'.format(e))
            print('City: {}'.format(name))
    
    time.sleep(1)

Error grabbing data for Longavi
Not Found
Error grabbing data for Machali
Not Found
Error grabbing data for Vicuna
Not Found
Error grabbing data for Villa Presidente Frei, Nunoa, Santiago, Chile
Not Found


In [30]:
def getTimestamp(dt):
    return pd.datetime.fromtimestamp(dt)

In [31]:
def getDate(tstamp):
    dt=datetime.datetime.fromtimestamp(tstamp)
    return dt.strftime('%b %d - %H:%m')

In [33]:
weatherCollection.estimated_document_count()

366

## Querying MongoDB for useful information

In [34]:
cur=weatherCollection.find()

In [35]:
sortedCur=cur.sort('main.temp')

In [36]:
for doc in sortedCur.limit(10):
    try:
        print(doc['name'])
        print(doc['main']['temp'])
    except:
        print('Error: missing name/temp')

Región Aisén del General Carlos Ibáñez del Campo
266.923
Putre
269.023
Caletones
269.523
Cochrane
271.923
Tambillo
274.15
Puerto Williams
275.15
Región de Magallanes y de la Antártica Chilena
275.573
Melipeuco
275.973
Región de Arica y Parinacota
276.323
Puerto Natales
277.073


In [43]:
cur.collection.estimated_document_count()

366

## Find the max recorded temperatures over time per station

In [44]:
pipeline=[]

In [45]:
pipeline.append({'$group':{'_id':'$name', 'maxTemp':{'$max': '$main.temp'}}})

In [46]:
pipeline.append({'$limit':10})

In [47]:
cur=weatherCollection.aggregate(pipeline=pipeline)

In [48]:
for d in cur:
    print(d['_id'], d['maxTemp'])

Provincia de Santiago 300.77
Las Gaviotas 299.123
Putre 269.023
Arica 288.03
Santiago 292.15
Futaleufu 284.56
El Arrayán 292.15
Puerto Bories 277.073
Barrio Bellavista 296.073
Castro 282.123


## Get datetime of max temp per station

In [49]:
pipeline=[]

In [50]:
pipeline.append({'$match':{'name':{'$exists':True}}})

In [51]:
pipeline.append({'$sort':{'name':1, 'main.temp':-1}})

In [52]:
pipeline.append({'$group':{'_id':'$name', 'maxTemp':{'$first':'$main.temp'}, 'date':{'$first':'$dt'}}})

In [53]:
pipeline.append({'$limit':10})

In [54]:
cur=weatherCollection.aggregate(pipeline=pipeline)

In [55]:
for d in cur:
    print(getDate(d['date']), d['_id'], d['maxTemp'])

Aug 26 - 20:08 Ñuñoa 292.15
Aug 26 - 19:08 la Reina 298.11
Aug 26 - 20:08 Zapallar 285.15
Aug 26 - 20:08 Yumbel 286.15
Aug 26 - 20:08 Vitacura 292.15
Aug 26 - 20:08 Villa Alhué 284.15
Aug 26 - 21:08 Vallenar 285.123
Aug 26 - 20:08 Valdivia de Paine 292.15
Aug 26 - 20:08 Tongoy 285.15
Aug 26 - 21:08 Tome 297.15
