In [54]:
import numpy as np
import requests
import pandas as pd

#PONTOS DE INTERESSE

tmp = requests.get('https://wdl-data.fra1.digitaloceanspaces.com/porto-digital/POIs.json').json()
ret = []

for count, point_of_interest in enumerate(tmp['points_of_interest']):
    category = point_of_interest.get('category')[0]['value']
    location = point_of_interest.get('location')
    created = point_of_interest.get('created')
    
    if 'point' in location:
        location = location.get('point')[0].get('Point').get('posList')
    else:
        location = np.nan
    description = point_of_interest.get('description')[0]['value']
    
    ret.append({
        'id': count,
        'created_at': created,
        'category': category,
        'location': location,
        'description': description
    })

df_pois = pd.DataFrame.from_records(ret)

df_pois[['latitude', 'longitude']] = df_pois['location'].str.split(' ', 1, expand=True)
df_pois = df_pois.drop(1609) #NaN

In [55]:
#SENSORES

data_entities = pd.read_csv('/content/drive/MyDrive/WDL/data_entities.csv', sep=',', encoding='latin-1', error_bad_lines=False)
indexNames = data_entities[(data_entities['entity_id'] == 'testsixsq')].index
data_entities.drop(indexNames , inplace=True)

In [56]:
import geopy.distance

def get_lat_lon_dist(row):
    latlon1 = tuple(row[['latitude1', 'longitude1']])
    latlon2 = tuple(row[['latitude2', 'longitude2']])

    return geopy.distance.geodesic(latlon1, latlon2).kilometers


# Cross-join to get all combinations of lat/lon.
dist = pd.merge(data_entities.assign(k=1), df_pois.assign(k=1), on='k', suffixes=('1', '2')) \
         .drop('k', axis=1)

 
dist['dist_NOME_DA_ENTIDADE'] = dist.apply(get_lat_lon_dist, axis=1)

In [57]:
dist_new=dist[['entity_id', 'id', 'category', 'dist_NOME_DA_ENTIDADE']].copy() 
dist_new = dist_new.rename(columns = {'id': 'point of interest', 'entity_id': 'sensor'}, inplace = False)
dist_new['is_below_threshold'] = np.where(dist_new['dist_NOME_DA_ENTIDADE']<=1.5, 1, 0)
sensor_categ=dist_new.groupby(['sensor', 'category'])['is_below_threshold'].sum().reset_index()
sensor_categ=sensor_categ.pivot_table(index="sensor", columns="category", values="is_below_threshold")

In [53]:
from google.colab import drive
drive.mount('/content/drive')

TRAFFIC_DATA_PATH = [
    'traffic_flow_2018.csv',
    'traffic_flow_2019.csv'
]


def load_data(data_paths, date_col, value_cols, agg_operation = 'sum'):
    concat_data = []

    for file in data_paths:
        print("Reading file ", file)
        df = pd.read_csv('/content/drive/MyDrive/WDL/' + file, encoding='latin-1')
        
        # print(df.head())

        df[date_col] = pd.to_datetime(df[date_col])
        df_resampled = df.set_index(date_col).groupby('entity_id')[value_cols].resample('H').agg(agg_operation).reset_index()
        concat_data.append(df_resampled)

    return pd.concat(concat_data)

date_col = 'dateobservedfrom'
value_cols = ['intensity']
files_name = TRAFFIC_DATA_PATH
traffic_data = load_data(files_name, date_col, value_cols, agg_operation = 'sum')
traffic_data.rename(columns={'dateobservedfrom': 'dateobserved', 'entity_id':'sensor'}, inplace=True)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Reading file  traffic_flow_2018.csv
Reading file  traffic_flow_2019.csv


  if self.run_code(code, result):


In [58]:
traffic_categ = pd.merge(sensor_categ, traffic_data, on="sensor")