In [1]:
# !pip install pyshp
import pandas as pd

def read_shapefile(shp_path):
    """
    Read a shapefile into a Pandas dataframe with a 'coords' column holding
    the geometry information. This uses the pyshp package
    """
    import shapefile

    #read file, parse out the records and shapes
    sf = shapefile.Reader(shp_path)
    fields = [x[0] for x in sf.fields][1:]
    records = sf.records()
    shps = [s.points for s in sf.shapes()]

    #write into a dataframe
    df = pd.DataFrame(columns=fields, data=records)
    df = df.assign(coords=shps)

    return df

In [2]:
df = read_shapefile('data/outdoor_inventory/Outdoor_Inventory_AV.shp')

In [3]:
import pickle

reverse_coords_list = pd.read_pickle('data/reverse_loc.pickle')

In [4]:
df['address_road'] = [x.get('address').get('road') for x in reverse_coords_list]
df['concelho'] = [x.get('address').get('town') for x in reverse_coords_list]
df['freguesia'] = [x.get('address').get('village') if 'village' in x.get('address') else x.get('address').get('neighbourhood') for x in reverse_coords_list]
df['distrito'] = [x.get('address').get('county') for x in reverse_coords_list]

In [5]:
df['address_road'] = df['address_road'].fillna('')

df['is_national_road'] = df['address_road'].str.contains('EN')
df['is_highway'] = df['address_road'].str.contains('Auto')
df['is_city_center'] = (~df['is_national_road']) & (~df['is_highway']) & (df['address_road'] != '')

In [6]:
from geopy import distance

distance.distance((40.900768, -8.473975), (40.9142, -8.48878)).km

1.9444630671970706

In [7]:
# https://geoffboeing.com/2014/08/clustering-to-reduce-spatial-data-set-size/
# https://stackoverflow.com/questions/43592094/efficient-way-to-calculate-geographic-density-in-pandas 
# https://stackoverflow.com/questions/34579213/dbscan-for-clustering-of-geographic-location-data 

from scipy.spatial import cKDTree

def find_neighbours_within_radius(xy, radius):
    tree = cKDTree(xy)
    within_radius = tree.query_ball_tree(tree, r=radius)
    return within_radius

def flatten_nested_list(nested_list):
    return [item for sublist in nested_list for item in sublist]

def total_neighbours_within_radius(xy, radius):
    neighbours = find_neighbours_within_radius(xy, radius)
    return len(flatten_nested_list(neighbours))

In [92]:
def get_density_billboards(df):
    
    df_copy = df.copy()
    # Calculate number of billboards in a X km radius for each point

    import numpy as np

    kms_per_radian = 6371.0088
    radius = 1.5
    neighbours_within_radius = find_neighbours_within_radius(np.radians(df_copy[['Y', 'X']].values), radius/kms_per_radian)

    df_copy['nbr_points_around_billboard'] = [len(x) for x in neighbours_within_radius]

    return df_copy

In [93]:
df = get_density_billboards(df)

In [10]:
df['average_people_around_billboard'] = [np.mean(df.iloc[x].Average_Da) for x in neighbours_within_radius]
df['std_people_around_billboard'] = [np.std(df.iloc[x].Average_Da) for x in neighbours_within_radius]

In [None]:
# TODO: Ver se faz sentido desvio padrão dentro do cluster

In [11]:
# points where this value is very negative are just visual noise, they make no impact
# retirar N billboards por concelho
df['diff_to_neighbourhood'] = df['Average_Da'] - df['average_people_around_billboard']

In [None]:
# sitios maus: sitios com pouca gente, e que tens muita gente a volta. As pessoas estão a passar pelos outros, e não pelo teu. 
# zonas com muita densidade: estamos a dividir a eficiencia de marketing com outras empresas

# Locais para novas 

In [25]:
# Focus on lisbon

df = df[df['distrito'] == 'Lisboa']

In [110]:
def fitness(df):
    """
    Fitness function, which we want to minimize
    """
    
    # Get billboard density
    df = get_density_billboards(df)

    max_density = df['nbr_points_around_billboard'].mean()
    total_number_of_views = df['Average_Da'].sum()
    return max_density # + total_number_of_views

In [111]:
fitness(df)

382.6084085051546

In [119]:
df = df.reset_index()

In [120]:
# We might have different impressions for the same coordinate (maybe diff directions of traffic?) so let's average it out

mean_impressions_per_coord = df.groupby(['X', 'Y']).Average_Da.mean().reset_index()

In [121]:
def get_neighbours(df, nbr_neighbours):
    """
    Creates a list of size nbr_neighbours with potential swaps of coordinates
    """
    
    neighbors = []
    
    # Pick new positions for swapping random elements of df
    sampled_coords = mean_impressions_per_coord.sample(nbr_neighbours).reset_index()
    
    for i in range(nbr_neighbours):
        
        df_copy = df.copy()

        # Pick a random number between 0 and len(df)
        random_number = np.random.randint(0, len(df_copy))
                    
        # Replace by the Average_Da at that position
        df_copy.loc[random_number, ['X', 'Y', 'Average_Da']] = sampled_coords.loc[i, ['X', 'Y', 'Average_Da']]
                
        neighbors.append(df_copy)
    
    return neighbors

In [114]:
neighbors_it_1 = get_neighbours(df.reset_index(), 5)

In [116]:
fitness(neighbors_it_1[0]), fitness(neighbors_it_1[1]), fitness(neighbors_it_1[3])

(382.64851804123714, 382.67042525773195, 382.6074420103093)

In [130]:
best_fitness = np.inf
best_solution = df

for iteration in range(10):
    
    # Get 5 random neighbours
    neighbors = get_neighbours(best_solution, 30)
    
    fitness_neighbours = [fitness(x) for x in neighbors]
    
    neighbour_lower_fitness_idx = np.argmin(fitness_neighbours)
    fitness_neighbour_lower_fitness = fitness_neighbours[neighbour_lower_fitness_idx]
    neighbour_lower_fitness = neighbors[neighbour_lower_fitness_idx]
    
    if fitness_neighbour_lower_fitness < best_fitness:
        best_fitness = fitness_neighbour_lower_fitness
        best_solution = neighbour_lower_fitness
        print("Found a better solution!")
        
    print("Epoch %d | Fitness %f" % (iteration, best_fitness))

Found a better solution!
Epoch 0 | Fitness 382.456508
Found a better solution!
Epoch 1 | Fitness 382.347616
Found a better solution!
Epoch 2 | Fitness 382.183634
Found a better solution!
Epoch 3 | Fitness 382.031411
Found a better solution!
Epoch 4 | Fitness 381.836179
Found a better solution!
Epoch 5 | Fitness 381.662371
Found a better solution!
Epoch 6 | Fitness 381.484697
Found a better solution!
Epoch 7 | Fitness 381.343589
Found a better solution!
Epoch 8 | Fitness 381.186050
Found a better solution!
Epoch 9 | Fitness 380.995329


In [140]:
COLS = ['PanelID', 'X', 'Y', 'Max_Visibi']

In [146]:
diff = pd.concat([df[COLS], best_solution[COLS]]).drop_duplicates(keep=False)
diff.head(3)

Unnamed: 0,PanelID,X,Y,Max_Visibi
1523,25619,-9.140753,38.731875,69
1530,21061,-9.14422,38.738634,69
2162,7824,-9.144519,38.741787,69


In [143]:
df[df.PanelID == 25619]

Unnamed: 0,index,PanelID,X,Y,Max_Visibi,Average_Da,coords,address_road,concelho,freguesia,distrito,is_national_road,is_highway,is_city_center,nbr_points_around_billboard,average_people_around_billboard,std_people_around_billboard,diff_to_neighbourhood
1523,5478,25619,-9.140753,38.731875,69,2428.0,"[[-9.140753, 38.731875]]",Largo Dona Estefânia,,Saldanha,Lisboa,False,False,True,1218,8800.955802,5510.880071,-6372.955802


In [144]:
best_solution[best_solution.PanelID == 25619]

Unnamed: 0,index,PanelID,X,Y,Max_Visibi,Average_Da,coords,address_road,concelho,freguesia,distrito,is_national_road,is_highway,is_city_center,nbr_points_around_billboard,average_people_around_billboard,std_people_around_billboard,diff_to_neighbourhood
1523,5478,25619,-9.148588,38.779721,69,5438.0,"[[-9.140753, 38.731875]]",Largo Dona Estefânia,,Saldanha,Lisboa,False,False,True,1218,8800.955802,5510.880071,-6372.955802


In [None]:
# TODO: Swap de linhas, metaheuristica onde re-calculas a densidade e calculas a average_da
# penalizar para minimizar a distancia percorrida para trocar o billboard 
# minimizar nº total de operações - Custo estimado da solução 

# Probabilidade de uma zona sofrer swap é proporcional ao nº de repetições? Ideia a pensar

In [142]:
df[df.nbr_points_around_billboard > 10].sort_values(by='diff_to_neighbourhood').head(5)

Unnamed: 0,PanelID,X,Y,Max_Visibi,Average_Da,coords,address_road,concelho,freguesia,distrito,is_national_road,is_highway,is_city_center,nbr_points_around_billboard,average_people_around_billboard,std_people_around_billboard,diff_to_neighbourhood
1057,14609,-7.839479,37.031645,89,514.0,"[[-7.839479, 37.031645]]",Praceta José Leal Júnior,Olhão,,Faro,False,False,True,28,18629.642857,8628.288877,-18115.642857
8820,14608,-7.839473,37.031647,89,3657.0,"[[-7.839473, 37.031647]]",Praceta José Leal Júnior,Olhão,,Faro,False,False,True,28,18629.642857,8628.288877,-14972.642857
1267,6273,-8.885145,40.169839,69,628.0,"[[-8.885145, 40.169839]]",Rua Dona Maria,Figueira da Foz,,Coimbra,False,False,True,12,15455.833333,13779.42105,-14827.833333
1407,6274,-8.885147,40.16984,69,687.0,"[[-8.885147, 40.16984]]",Rua Dona Maria,Figueira da Foz,,Coimbra,False,False,True,12,15455.833333,13779.42105,-14768.833333
62,6285,-8.848601,40.148077,69,51.0,"[[-8.848601, 40.148077]]",Avenida Saraiva de Carvalho,Figueira da Foz,,Coimbra,False,False,True,45,14669.111111,17086.656032,-14618.111111


In [12]:
df.head(3)

Unnamed: 0,PanelID,X,Y,Max_Visibi,Average_Da,coords,address_road,concelho,freguesia,distrito,is_national_road,is_highway,is_city_center,nbr_points_around_billboard,average_people_around_billboard,std_people_around_billboard,diff_to_neighbourhood
0,26144,-8.473975,40.900768,69,31.0,"[[-8.473975, 40.900768]]",Rua Alto das Casas,São João da Madeira,Macieira de Sarnes,Aveiro,False,False,True,25,5148.24,4158.391442,-5117.24
1,11714,-9.315949,38.958125,69,31.0,"[[-9.315949, 38.958125]]",EN 9,Mafra,Barreiralva,Lisboa,True,False,False,9,998.333333,1228.854028,-967.333333
2,26109,-8.510079,40.871821,69,32.0,"[[-8.510079, 40.871821]]",Rua Professor Doutor António Joaquim Ferreira ...,Oliveira de Azeméis,Vila de Cucujães,Aveiro,False,False,True,5,92.0,44.181444,-60.0


# Map

In [13]:
df[df.PanelID == 48084]

Unnamed: 0,PanelID,X,Y,Max_Visibi,Average_Da,coords,address_road,concelho,freguesia,distrito,is_national_road,is_highway,is_city_center,nbr_points_around_billboard,average_people_around_billboard,std_people_around_billboard,diff_to_neighbourhood
16427,48084,-9.290662,38.75665,145,6422.0,"[[-9.290662, 38.75665]]",IC 19;A 37,,,Lisboa,False,False,True,84,5694.178571,2809.481859,727.821429


In [16]:
df_sampling = df[df.distrito == 'Lisboa']

In [18]:
import folium

m = folium.Map(location=[38.760398,-9.190202], zoom_start=13)

for indice, row in df_sampling.iterrows():
      folium.CircleMarker(location=[row["Y"], row["X"]],
                          color="crimson", fill_color="crimson", tooltip=row["PanelID"], fill=True,
                          radius=3).add_to(m)
        
        
# m

In [None]:
df.groupby(['X', 'Y']).PanelID.count().sort_values()

In [None]:
df.X.value_counts()

In [None]:
df[df.X == -9.128028]# 38.620447]]]

In [None]:
import seaborn as sns

sns.distplot(df.Max_Visibi)

In [None]:
df.Max_Visibi.value_counts()