In [None]:
!pip install boto3
!pip install pickle
!pip install yaml
!pip install sodapy

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
import pickle
import boto3

from datetime import date
from sodapy import Socrata

In [None]:
with open('credentials.yaml', 'r') as f:
    config = yaml.safe_load(f)

token = config['api_chicago']['app_token']
username = config['api_chicago']['username']
password = config['api_chicago']['password']
matricula = config['iexe']['matricula']

In [None]:
# code for dataset of food inspections
chicago_dataset = "4ijn-s7e5"

### 1. Función get_client

In [None]:
def get_client():
    client = Socrata("data.cityofchicago.org", 
                     token,
                     username=username,
                     password=password)
    
    return client

### 2. Ingesta inicial

Bajar los datos de la API de inspecciones de establecimientos de comida. 

In [None]:
def ingesta_inicial(chicago_dataset, client, limit):
    datasets = client.get(chicago_dataset, limit=limit, offset=0, order='inspection_date')
    
    return datasets

In [None]:
client = get_client()
datasets = ingesta_inicial(chicago_dataset, client, 300000)

In [None]:
len(datasets)

249327

In [None]:
datasets[0]

{'inspection_id': '52234',
 'dba_name': 'Cafe 608',
 'aka_name': 'Cafe 608',
 'license_': '2013328',
 'facility_type': 'Restaurant',
 'risk': 'Risk 1 (High)',
 'address': '608 W BARRY AVE ',
 'city': 'CHICAGO',
 'state': 'IL',
 'zip': '60657',
 'inspection_date': '2010-01-04T00:00:00.000',
 'inspection_type': 'License Re-Inspection',
 'results': 'Pass',
 'latitude': '41.938006880423615',
 'longitude': '-87.6447545707008',
 'location': {'latitude': '41.938006880423615',
  'longitude': '-87.6447545707008'}}

### 3. Almacenamiento de ingesta inicial

In [None]:
def guardar_ingesta(bucket, bucket_path, dataset):
    session = boto3.Session(
        aws_access_key_id = config['s3']['aws_access_key_id'],
        aws_secret_access_key = config['s3']['aws_secret_access_key'],
        aws_session_token= config['s3']['aws_session_token']
    )

    s3 = session.resource('s3')
    s3.Object(bucket, bucket_path).put(Body=dataset)

### 3a. Creación de Bucket
##### creamos la variable con la fecha de hoy

In [None]:
TODAY = date.today()
if not 'bucket_creado' in locals():
  print("creando")
  session = boto3.Session(
    aws_access_key_id = config['s3']['aws_access_key_id'],
    aws_secret_access_key = config['s3']['aws_secret_access_key'],
    aws_session_token= config['s3']['aws_session_token']
  )
  s3 = session.resource('s3')
  nombre_bucket = "aplicaciones-cd-1-" + config['iexe']['matricula']
  s3.create_bucket(Bucket=nombre_bucket)
  bucket_creado = True

creando


In [None]:
pickle_data = pickle.dumps(datasets)

In [None]:
bucket = "aplicaciones-cd-1-" + config['iexe']['matricula']
key = "ingesta/inicial/inspecciones-historicas-" + str(TODAY) + ".pkl"

guardar_ingesta(bucket, key, pickle_data)

Guardar el dataset en un pickle en un bucket de s3, el nombre del archivo dete terminar con el día en el que se realizó la ingesta (obtenido de manera dinámica). 

### 4. Ingestas consecutivas

La siguiente vez que se ingestan los datos se tendrá que pedir específicamente a partir de la fecha desde la que no tienes datos.

In [None]:
def ingesta_consecutiva(chicago_dataset, client, fecha, limit):
    new_dataset = client.get(chicago_dataset, limit=limit, where="inspection_date>='{}'".format(fecha))
    
    return new_dataset

In [None]:
client = get_client()

In [None]:
new_dataset = ingesta_consecutiva(chicago_dataset, client, '2020-11-03', 1000)

In [None]:
new_dataset[0]

{'inspection_id': '2454188',
 'dba_name': '63RD AND DREXEL MOBIL',
 'aka_name': '63RD AND DREXEL MOBIL/GREAT STEAK',
 'license_': '1817156',
 'facility_type': 'Restaurant',
 'risk': 'Risk 3 (Low)',
 'address': '850 E 63RD ST ',
 'city': 'CHICAGO',
 'state': 'IL',
 'zip': '60637',
 'inspection_date': '2020-11-03T00:00:00.000',
 'inspection_type': 'Canvass',
 'results': 'Pass',
 'violations': '39. CONTAMINATION PREVENTED DURING FOOD PREPARATION, STORAGE & DISPLAY - Comments: OBSERVED FOOD ITEMS STORED ON FLOOR BEHIND FRONT COUNTER. INSTRUCTED MANAGER TO ELEVATE ALL FOOD ITEMS SIX INCHES OFF FLOOR. | 49. NON-FOOD/FOOD CONTACT SURFACES CLEAN - Comments: OBSERVED RESIDUE ON INTERIOR SURFACES OF ICE MACHINE. INSTRUCTED MANAGER TO CLEAN AND MAINTAIN. | 55. PHYSICAL FACILITIES INSTALLED, MAINTAINED & CLEAN - Comments: OBSERVED DUST, DIRT AND DEBRIS ON FLOORS ALONG WALLS AND CORNERS IN SALES AND REAR STORAGE AREAS. INSTRUCTED MANAGER TO CLEAN AND MAINTAIN.',
 'latitude': '41.780547573927414',
 

In [None]:
len(new_dataset)

1000

### 5. Almacenamiento de ingestas consecutivas

In [None]:
pickled_new_data = pickle.dumps(new_dataset)

In [None]:
bucket = "aplicaciones-cd-1-" + config['iexe']['matricula']
key = "ingesta/consecutiva/inspecciones-consecutivas-" + str(TODAY) + ".pkl"

guardar_ingesta(bucket, key, pickled_new_data)