In [1]:
# !pip install pyshp
import pandas as pd

def read_shapefile(shp_path):
    """
    Read a shapefile into a Pandas dataframe with a 'coords' column holding
    the geometry information. This uses the pyshp package
    """
    import shapefile

    #read file, parse out the records and shapes
    sf = shapefile.Reader(shp_path)
    fields = [x[0] for x in sf.fields][1:]
    records = sf.records()
    shps = [s.points for s in sf.shapes()]

    #write into a dataframe
    df = pd.DataFrame(columns=fields, data=records)
    df = df.assign(coords=shps)

    return df

In [71]:
df = read_shapefile('data/outdoor_inventory/Outdoor_Inventory_AV.shp')

In [72]:
import pickle

reverse_coords_list = pd.read_pickle('data/reverse_loc.pickle')

In [73]:
df['address_road'] = [x.get('address').get('road') for x in reverse_coords_list]
df['concelho'] = [x.get('address').get('town') for x in reverse_coords_list]
df['freguesia'] = [x.get('address').get('village') if 'village' in x.get('address') else x.get('address').get('neighbourhood') for x in reverse_coords_list]
df['distrito'] = [x.get('address').get('county') for x in reverse_coords_list]

In [74]:
df['address_road'] = df['address_road'].fillna('')

df['is_national_road'] = df['address_road'].str.contains('EN')
df['is_highway'] = df['address_road'].str.contains('Auto')
df['is_city_center'] = (~df['is_national_road']) & (~df['is_highway']) & (df['address_road'] != '')

In [103]:
from geopy import distance

distance.distance((40.900768, -8.473975), (40.9142, -8.48878)).km

1.9444630671970706

In [78]:
# https://geoffboeing.com/2014/08/clustering-to-reduce-spatial-data-set-size/
# https://stackoverflow.com/questions/43592094/efficient-way-to-calculate-geographic-density-in-pandas 
# https://stackoverflow.com/questions/34579213/dbscan-for-clustering-of-geographic-location-data 

from scipy.spatial import cKDTree

def find_neighbours_within_radius(xy, radius):
    tree = cKDTree(xy)
    within_radius = tree.query_ball_tree(tree, r=radius)
    return within_radius

def flatten_nested_list(nested_list):
    return [item for sublist in nested_list for item in sublist]

def total_neighbours_within_radius(xy, radius):
    neighbours = find_neighbours_within_radius(xy, radius)
    return len(flatten_nested_list(neighbours))

In [121]:
# Calculate number of billboards in a X km radius for each point

import numpy as np

kms_per_radian = 6371.0088
radius = 1.5
neighbours_within_radius = find_neighbours_within_radius(np.radians(df[['Y', 'X']].values), radius/kms_per_radian)

In [134]:
df['nbr_points_around_billboard'] = [len(x) for x in neighbours_within_radius]

In [132]:
df['average_people_around_billboard'] = [np.mean(df.iloc[x].Average_Da) for x in neighbours_within_radius]
df['std_people_around_billboard'] = [np.std(df.iloc[x].Average_Da) for x in neighbours_within_radius]

In [None]:
# TODO: Ver se faz sentido desvio padrão dentro do cluster

In [141]:
# points where this value is very negative are just visual noise, they make no impact
# retirar N billboards por concelho
df['diff_to_neighbourhood'] = df['Average_Da'] - df['average_people_around_billboard']

In [None]:
# sitios maus: sitios com pouca gente, e que tens muita gente a volta. As pessoas estão a passar pelos outros, e não pelo teu. 
# zonas com muita densidade: estamos a dividir a eficiencia de marketing com outras empresas

# Locais para novas 

In [None]:
# TODO: Swap de linhas, metaheuristica onde re-calculas a densidade e calculas a average_da
# penalizar para minimizar a distancia percorrida para trocar o billboard 
# minimizar nº total de operações - Custo estimado da solução 

# Probabilidade de uma zona sofrer swap é proporcional ao nº de repetições? Ideia a pensar

In [146]:
df[['X', 'Y']].value_counts()

X          Y        
-9.128028  38.620447    11
-9.167858  38.741668     6
-8.669275  41.238443     4
-9.235608  38.760009     4
-8.890420  38.530000     4
                        ..
-8.629724  41.234395     1
-8.629715  41.093727     1
-8.629713  41.234399     1
-8.629710  41.035395     1
-9.475810  38.815709     1
Length: 28638, dtype: int64

In [145]:
df.sort_values(by='Y', ascending=False)

Unnamed: 0,PanelID,X,Y,Max_Visibi,Average_Da,coords,address_road,concelho,freguesia,distrito,is_national_road,is_highway,is_city_center,nbr_points_around_billboard,average_people_around_billboard,std_people_around_billboard,diff_to_neighbourhood
1969,49151,-8.256722,42.106631,145,964.0,"[[-8.256722, 42.106631]]",Rua Santo Cristo,Melgaço,Roussas,Viana do Castelo,False,False,True,1,964.000000,0.000000,0.000000
2032,48884,-8.650600,42.029889,162,998.0,"[[-8.6506, 42.029889]]",Acesso à A 3 - Espanha,Valença,,Viana do Castelo,False,False,True,1,998.000000,0.000000,0.000000
26258,3442,-8.839207,41.878759,69,12791.0,"[[-8.839207, 41.878759]]",Rua Conselheiro Miguel Dantas,,,Viana do Castelo,False,False,True,16,11411.250000,4700.813926,1379.750000
12461,3447,-8.839732,41.877353,69,4936.0,"[[-8.839732, 41.877353]]",Rua Conselheiro Miguel Dantas,,,Viana do Castelo,False,False,True,17,11189.470588,4645.940962,-6253.470588
22363,3446,-8.839731,41.877348,69,9373.0,"[[-8.839731, 41.877348]]",Rua Conselheiro Miguel Dantas,,,Viana do Castelo,False,False,True,17,11189.470588,4645.940962,-1816.470588
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22101,30022,-7.917005,37.014409,145,9220.0,"[[-7.917005, 37.014409]]",Ecovia do Litoral,,,Faro,False,False,True,89,8536.059925,5433.475169,683.940075
22922,30021,-7.917086,37.014358,145,9729.0,"[[-7.917086, 37.014358]]",Ecovia do Litoral,,,Faro,False,False,True,89,8536.059925,5433.475169,1192.940075
8268,6152,-7.932639,37.013506,69,3458.0,"[[-7.932639, 37.013506]]",Largo de São Francisco,,,Faro,False,False,True,165,7624.579798,5595.403815,-4166.579798
11821,45958,-7.932727,37.012461,69,4713.0,"[[-7.932727, 37.012461]]",Largo de São Francisco,,,Faro,False,False,True,147,7868.460317,5824.822951,-3155.460317


In [142]:
df[df.nbr_points_around_billboard > 10].sort_values(by='diff_to_neighbourhood').head(5)

Unnamed: 0,PanelID,X,Y,Max_Visibi,Average_Da,coords,address_road,concelho,freguesia,distrito,is_national_road,is_highway,is_city_center,nbr_points_around_billboard,average_people_around_billboard,std_people_around_billboard,diff_to_neighbourhood
1057,14609,-7.839479,37.031645,89,514.0,"[[-7.839479, 37.031645]]",Praceta José Leal Júnior,Olhão,,Faro,False,False,True,28,18629.642857,8628.288877,-18115.642857
8820,14608,-7.839473,37.031647,89,3657.0,"[[-7.839473, 37.031647]]",Praceta José Leal Júnior,Olhão,,Faro,False,False,True,28,18629.642857,8628.288877,-14972.642857
1267,6273,-8.885145,40.169839,69,628.0,"[[-8.885145, 40.169839]]",Rua Dona Maria,Figueira da Foz,,Coimbra,False,False,True,12,15455.833333,13779.42105,-14827.833333
1407,6274,-8.885147,40.16984,69,687.0,"[[-8.885147, 40.16984]]",Rua Dona Maria,Figueira da Foz,,Coimbra,False,False,True,12,15455.833333,13779.42105,-14768.833333
62,6285,-8.848601,40.148077,69,51.0,"[[-8.848601, 40.148077]]",Avenida Saraiva de Carvalho,Figueira da Foz,,Coimbra,False,False,True,45,14669.111111,17086.656032,-14618.111111


In [None]:
# Filtrar lisboa e porto

In [None]:
# Features:
# Para um par (X, Y), calcular número de outdoors vizinhos

# Modelo para prever o número de pessoas que vêem os outdoors num dado dia:
# Freguesia
# Densidade Populacional
# Pontos de interesse em redor dessa região (proxy de desenvolvimento)

In [None]:
df.head(3)

In [None]:
df[df.PanelID == 48084]

In [None]:
df_sampling = df[df.distrito == 'Lisbon']

In [None]:
import folium

m = folium.Map(location=[38.760398,-9.190202], zoom_start=13)

for indice, row in df_sampling.iterrows():
      folium.CircleMarker(location=[row["Y"], row["X"]],
                          color="crimson", fill_color="crimson", tooltip=row["PanelID"], fill=True,
                          radius=3).add_to(m)
        
        
m

In [None]:
df.groupby(['X', 'Y']).PanelID.count().sort_values()

In [None]:
df.X.value_counts()

In [None]:
df[df.X == -9.128028]# 38.620447]]]

In [None]:
import seaborn as sns

sns.distplot(df.Max_Visibi)

In [None]:
df.Max_Visibi.value_counts()