In [8]:
import logging
import pandas as pd
from sodapy import Socrata
from datetime import datetime
import pymongo
import expiringdict
import utils

# download data
client0 = Socrata("data.lacity.org", None)
results = client0.get("yru6-6re4",where="arst_date > '2019-11-30T00:00:00.000'") # wait to be confirmed
df = pd.DataFrame.from_records(results)
df['arst_date'] = pd.to_datetime(df['arst_date'])
#df['temp'] = df['time'].apply(lambda x: datetime.strptime(x, '%H%M').time())
df['age'] = pd.to_numeric(df['age'])

# connect with mongodb
client = pymongo.MongoClient()
logger = logging.Logger(__name__)
utils.setup_logger(logger, 'db.log')
RESULT_CACHE_EXPIRATION = 15 

def upsert_crime(df):
    """
    Update MongoDB database `energy` and collection `energy` with the given `DataFrame`.
    """
    db = client.get_database("crime")
    collection = db.get_collection("crime")
    update_count = 0
    for record in df.to_dict('records'):
        result = collection.replace_one(
            filter = {'rpt_id': record['rpt_id']},    # locate the document if exists
            replacement = record,                         # latest document
            upsert=True)
        if result.matched_count > 0:
            update_count += 1
    logger.info("rows={}, update={}, ".format(df.shape[0], update_count) +
                "insert={}".format(df.shape[0]-update_count))



In [24]:
import time
import sched
import pandas as pd
import logging
import utils
from database import upsert_crime 
from sodapy import Socrata
from datetime import datetime

CRIME_SOURCE = "data.lacity.org"
MAX_DOWNLOAD_ATTEMPT = 5
DOWNLOAD_PERIOD = 15         # second
logger = logging.Logger(__name__)
utils.setup_logger(logger, 'data.log')


def download_crime(url=CRIME_SOURCE, retries=MAX_DOWNLOAD_ATTEMPT):
    """Returns records from `CRIME_SOURCE` that includes crime and arrestee information.
    """
    client = Socrata(url, None)
    results = client.get("yru6-6re4",where="arst_date > '2019-11-30T00:00:00.000'") # wait to be confirmed
    return results

def convert_crime(results):
    """Converts `results` to `DataFrame`, removes empty lines and descriptions
    """
    # use StringIO to convert string to a readable buffer
    df = pd.DataFrame.from_records(results)
    #df.dropna(inplace=True)             # drop rows with empty cells
    return df


def update_once():
    results = download_crime()
    df = convert_crime(results)
    upsert_crime(df)
    
def main_loop(timeout=DOWNLOAD_PERIOD):
    scheduler = sched.scheduler(time.time, time.sleep)

    def _worker():
        try:
            update_once()
        except Exception as e:
            logger.warning("main loop worker ignores exception and continues: {}".format(e))
        scheduler.enter(timeout, 1, _worker)    # schedule the next event

    scheduler.enter(0, 1, _worker)              # start the first event
    scheduler.run(blocking=True)


if __name__ == '__main__':
    main_loop()



2019-12-13 04:33:56,877 [upsert_crime]: rows=990, update=990, insert=0




2019-12-13 04:34:13,681 [upsert_crime]: rows=990, update=990, insert=0




2019-12-13 04:34:30,459 [upsert_crime]: rows=990, update=990, insert=0




2019-12-13 04:34:47,177 [upsert_crime]: rows=990, update=990, insert=0




2019-12-13 04:35:04,248 [upsert_crime]: rows=990, update=990, insert=0




2019-12-13 04:35:21,001 [upsert_crime]: rows=990, update=990, insert=0




2019-12-13 04:35:37,743 [upsert_crime]: rows=990, update=990, insert=0




2019-12-13 04:35:54,585 [upsert_crime]: rows=990, update=990, insert=0


KeyboardInterrupt: 