In [3]:
#!conda install -c conda-forge folium=0.5.0 --yes
#import folium

#print('Folium installed and imported!')

In [1]:
import numpy as np  # scientific computation
import pandas as pd # primary data structure library
import matplotlib.pyplot as plt #primary plotting stricture
import folium # map generator.


<h1 align=center><font size = 5>Puntos de acceso gratuitos de WiFi en Nueva York</font></h1>

En esta página se muestra el procesamiento de la información correspondiente a los AP de WiFi gratuitos en la ciudad de Nueva York.
El fin de esta página es crear herramientas visuales que muestren la conectividad en la ciudad, para evaluar y justificar el proyecto de *Movilidad Inteligente: conectividad vehicular*, a realizar por alumnos de posgrado y pregado del Tecnológico de Monterrey.

A continuación se muestra el proceso de procesamiento y visualización:

### Importación del archivo de puntos de acceso gratuitos

In [2]:
df = pd.read_csv('NYC_AP_LOC.csv') 
df.head(5)
#df.shape

Unnamed: 0,OBJECTID,Borough,Type,Provider,Name,Location,Latitude,Longitude,X,Y,...,Neighborhood Tabulation Area (NTA),Council Distrcit,Postcode,BoroCD,Census Tract,BCTCB2010,BIN,BBL,DOITT_ID,"Location (Lat, Long)"
0,10362,3,Limited Free,ALTICEUSA,Linden Park,IN PARK PLAYGROUND AREA,40.658385,-73.887583,1015441.0,179163.810029,...,East New York,42,11207,305,1104,1104,0,3043490001,217,"(40.658385, -73.8875829994)"
1,10447,4,Limited Free,SPECTRUM,Elmhurst Park,Park Area,40.728838,-73.883751,1016470.0,204833.279032,...,Elmhurst,25,11373,404,499,499,0,0,824,"(40.7288380002, -73.8837509996)"
2,10605,3,Free,AT&T,Hillside Park,Near Vine St and Columbia Heights,40.7015,-73.9943,985830.5,194851.884952,...,Brooklyn Heights-Cobble Hill,33,11201,302,1,1,0,3002050001,1409,"(40.7014999998, -73.9943000006)"
3,9753,3,Free,NYCHA,0,331 Myrtle Ave,40.693442,-73.973193,991683.8,191917.129995,...,Fort Greene,35,11205,302,18501,18501,3335255,3020410001,665,"(40.6934417997, -73.9731930998)"
4,9884,3,Free,Downtown Brooklyn,,409 Fulton St.,40.692178,-73.988304,987493.5,191455.769697,...,DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill,33,11201,302,11,11,3000281,3001440001,1028,"(40.6921780001, -73.9883039999)"


## Definición del tipo de variables dentro del DataFrame
En este caso, primero se convierte todos los nombres de las columnas en cadenas de caracteres, en caso de que existieran números. Con el fin de homogeneizar la información
***

In [3]:
df.columns = list(map(str, df.columns))

df.dtypes

OBJECTID                                         int64
Borough                                          int64
Type                                            object
Provider                                        object
Name                                            object
Location                                        object
Latitude                                       float64
Longitude                                      float64
X                                              float64
Y                                              float64
Location_T                                      object
Remarks                                         object
City                                            object
SSID                                            object
SourceID                                        object
Activated                                       object
BoroCode                                         int64
Borough Name                                    object
Neighborho

Filtrado de la base
==========

In [4]:
df.drop(['Provider', 'OBJECTID','BIN', 'Census Tract', 'BBL', 'DOITT_ID', 'X', 'Y' ], axis = 1, inplace = True)
df.describe(include=['object'])

Unnamed: 0,Type,Name,Location,Location_T,Remarks,City,SSID,SourceID,Activated,Borough Name,Neighborhood Tabulation Area Code (NTACODE),Neighborhood Tabulation Area (NTA),"Location (Lat, Long)"
count,3319,3089,3319,3319,2771,3319,3319,2257,3319,3319,3319,3319,3319
unique,3,2499,3023,16,281,44,16,1871,416,5,178,179,3150
top,Free,0,Park Perimeter,Outdoor Kiosk,"Tablet Internet -phone , Free 1 GB Wi-FI Service",New York,LinkNYC Free Wi-Fi,0,09/09/9999,Manhattan,MN17,Midtown-Midtown South,"(40.687191, -73.7695589996)"
freq,2736,39,42,1867,1868,1671,1868,343,1497,1672,160,160,14


### Esta descripción general nos da la forma general en la cual se tienen a las antenas de Wi Fi de NYC

Podemos ver que hay:
- 16 SSID's diferentes
- 197 vecindarios
- 3023 locaciones diferentes
- Principalmente en Manhattan. 




### A continuación se crea un nuevo dataframe que agrupe el número de antenas por colonia

In [5]:
dfBor = df.groupby(['Borough Name']).count().rename(columns={"DIRECCIÓN":"DIR"}).reset_index()
dfBor.head()

Unnamed: 0,Borough Name,Borough,Type,Name,Location,Latitude,Longitude,Location_T,Remarks,City,...,SourceID,Activated,BoroCode,Neighborhood Tabulation Area Code (NTACODE),Neighborhood Tabulation Area (NTA),Council Distrcit,Postcode,BoroCD,BCTCB2010,"Location (Lat, Long)"
0,Bronx,316,316,316,316,316,316,316,278,316,...,137,316,316,316,316,316,316,316,316,316
1,Brooklyn,700,700,600,700,700,700,700,496,700,...,300,700,700,700,700,700,700,700,700,700
2,Manhattan,1672,1672,1542,1672,1672,1672,1672,1447,1672,...,1302,1672,1672,1672,1672,1672,1672,1672,1672,1672
3,Queens,531,531,531,531,531,531,531,464,531,...,432,531,531,531,531,531,531,531,531,531
4,Staten Island,100,100,100,100,100,100,100,86,100,...,86,100,100,100,100,100,100,100,100,100


### Esta información agrupada por "borough", o distrito nos muestra que:
* Bronx cuenta con 316 AP
* Brooklyn con 700
* Manhattan con 1672
* Queens con 531
* Staten Island con 100

In [6]:
dfBorCD = df.groupby(['BoroCD']).count().rename(columns={"DIRECCIÓN":"DIR"}).reset_index()
dfBorCD

Unnamed: 0,BoroCD,Borough,Type,Name,Location,Latitude,Longitude,Location_T,Remarks,City,...,SourceID,Activated,BoroCode,Borough Name,Neighborhood Tabulation Area Code (NTACODE),Neighborhood Tabulation Area (NTA),Council Distrcit,Postcode,BCTCB2010,"Location (Lat, Long)"
0,101,117,117,117,117,117,117,117,77,117,...,56,117,117,117,117,117,117,117,117,117
1,102,75,75,73,75,75,75,75,69,75,...,51,75,75,75,75,75,75,75,75,75
2,103,93,93,93,93,93,93,93,87,93,...,79,93,93,93,93,93,93,93,93,93
3,104,162,162,134,162,162,162,162,131,162,...,121,162,162,162,162,162,162,162,162,162
4,105,234,234,234,234,234,234,234,226,234,...,196,234,234,234,234,234,234,234,234,234
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,482,3,3,3,3,3,3,3,3,3,...,3,3,3,3,3,3,3,3,3,3
63,501,33,33,33,33,33,33,33,27,33,...,27,33,33,33,33,33,33,33,33,33
64,502,41,41,41,41,41,41,41,38,41,...,38,41,41,41,41,41,41,41,41,41
65,503,10,10,10,10,10,10,10,6,10,...,6,10,10,10,10,10,10,10,10,10


# Sección 2: Herramientas de visualización

El siguiente paso, una vez filtrada nuestra base de datos es hacer visualización geoespacial. Para lograr esto, se recurre al archivo geojson de la ciudad de México, disponible en:


## Mapa coropléxico de WiFi en NYC por distrito

In [7]:
nyc_geo = r'NYC_districts.geojson'

# creating a numpy array of length 6 and has linear spacing from the minium total immigration to the maximum total immigration
threshold_scale = np.linspace(dfBorCD['Location'].min(),
                              (dfBorCD['Location'].max()-200),
                              6, dtype=int)
threshold_scale = threshold_scale.tolist() # change the numpy array to a list
threshold_scale[-1] = threshold_scale[-1] + 1 # make sure that the last value of the list is greater than the maximum immigration

mapa_NYC = folium.Map(location=[40.730610 , -73.935242], zoom_start=12, tiles='Mapbox Bright')
mapa_NYC.choropleth(
    geo_data=nyc_geo,
    data=dfBorCD,
    columns=['BoroCD', 'Borough'],
    key_on='feature.properties.BoroCD',
    #threshold_scale=threshold_scale,
    fill_color='BuPu', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Mapa Coroplexico WiFi Gratuito NYC por distrito',
    reset=True
)
mapa_NYC







## Creación de mapa interactivo para localización de Antenas

In [8]:
from folium import plugins
latitude=40.730610
longitude=-73.935242
# let's start again with a clean copy of the map of San Francisco
map_nyc_num = folium.Map(location = [latitude, longitude], zoom_start = 12)

# instantiate a mark cluster object for the incidents in the dataframe
incidents = plugins.MarkerCluster().add_to(map_nyc_num)

# loop through the dataframe and add each data point to the mark cluster
for lat, lng, label, in zip(df['Latitude'], df['Longitude'], df['SSID']):
    folium.Marker(
        location=[lat, lng],
        icon=None,
        popup=label,
    ).add_to(incidents)

# display map
map_nyc_num

In [9]:
from area import area
obj = {'type':'Polygon','coordinates':[[[-180,-90],[-180,90],[180,90],[180,-90],[-180,-90]]]}
area(obj)

511207893395811.06

In [10]:
colAr= pd.read_csv('NYC_AP_LOC.csv')
colAr.describe(include=['object'])

Unnamed: 0,Type,Provider,Name,Location,Location_T,Remarks,City,SSID,SourceID,Activated,Borough Name,Neighborhood Tabulation Area Code (NTACODE),Neighborhood Tabulation Area (NTA),"Location (Lat, Long)"
count,3319,3319,3089,3319,3319,2771,3319,3319,2257,3319,3319,3319,3319,3319
unique,3,17,2499,3023,16,281,44,16,1871,416,5,178,179,3150
top,Free,LinkNYC - Citybridge,0,Park Perimeter,Outdoor Kiosk,"Tablet Internet -phone , Free 1 GB Wi-FI Service",New York,LinkNYC Free Wi-Fi,0,09/09/9999,Manhattan,MN17,Midtown-Midtown South,"(40.687191, -73.7695589996)"
freq,2736,1868,39,42,1867,1868,1671,1868,343,1497,1672,160,160,14


In [23]:
#!conda install -c conda-forge/label/geopandas_dev geopandas 
from area import area
import geopandas as gpd
df = gpd.read_file('NYC_districts.geojson')

df.dtypes
#Shape Area contains length of every Borough. 

BoroCD           int64
Shape_Leng     float64
Shape_Area     float64
geometry      geometry
dtype: object

In [18]:
totcoverage= ((3319 * (3.1416*(6400) )) / df['Shape_Area'].sum())*100
print('Total area of: ', df['Shape_Area'].sum() )
totcoverage

Total area of:  8456306536.227


0.7891460683705829

In [19]:
totcoverage= ((13693 * (3.1416*(6400) )) / df['geometry'].area.sum())*100
print('Total area of: ', df['geometry'].area.sum() )
totcoverage


Total area of:  0.08370070236876026


328927639229.4124

In [1]:
series = gpd.GeoSeries(df['geometry'])

deg2km = 111

(series.length * deg2km).sum()

probmedia2 = ((3319 * (3.1416*(6400) )) / (789 * 1000000))*100
probmedia2

NameError: name 'gpd' is not defined

In [2]:
((3319 * (3.1416*(6400) )) / (789 * 1000000))*100

8.4578720608365