In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
# import folium library to use leaflet
import folium
from folium.plugins import MarkerCluster

Now we will create an interactive map with the cities that have visited the web and the number of visits from each city.

In [2]:
# open the datafranme from the previous notebook 
ip_df = pd.read_csv('ip_df.csv')
ip_df

Unnamed: 0,ip_address,counts,continent,country,city,latitude,longitude
0,66.249.76.216,46382,North America,United States,Mountain View,37.4192,-122.0574
1,80.28.221.123,14725,Europe,Spain,Torrejón de Ardoz,40.4554,-3.4697
2,217.125.71.222,5201,Europe,Spain,Seville,37.3824,-5.9761
3,66.249.75.148,3558,North America,United States,Mountain View,37.4192,-122.0574
4,162.243.192.191,2927,North America,United States,New York,40.7308,-73.9975
...,...,...,...,...,...,...,...
2915,84.123.150.27,1,Europe,Spain,Gandia,38.9667,-0.1833
2916,217.130.150.116,1,Europe,Spain,,40.4172,-3.6840
2917,202.46.52.23,1,Asia,China,Shenzhen,22.5333,114.1333
2918,216.151.130.170,1,North America,United States,San Jose,37.4250,-121.9460


In [3]:
# create a new dataframe with only the columns que want: 
ip = ip_df[['latitude', 'longitude', 'city', 'counts']]
# now we check for empty values: 
ip.isnull().sum()

latitude       1
longitude      1
city         364
counts         0
dtype: int64

In this case, we will include the name of the city in the map, so we will delete rows that have empty values in 'city'. 

In [4]:
ip = ip.dropna()
ip.isnull().sum()
ip

Unnamed: 0,latitude,longitude,city,counts
0,37.4192,-122.0574,Mountain View,46382
1,40.4554,-3.4697,Torrejón de Ardoz,14725
2,37.3824,-5.9761,Seville,5201
3,37.4192,-122.0574,Mountain View,3558
4,40.7308,-73.9975,New York,2927
...,...,...,...,...
2914,-4.0000,-79.2167,Loja,1
2915,38.9667,-0.1833,Gandia,1
2917,22.5333,114.1333,Shenzhen,1
2918,37.4250,-121.9460,San Jose,1


In [5]:
len(ip.city.unique())

741

In [6]:
ip.counts.sum()

232929

In [7]:
ip.dtypes

latitude     float64
longitude    float64
city          object
counts         int64
dtype: object

In [8]:
# cities are repetaed in the dataframe because we grouped by ip address, we will count now total visits by city
# first we convert counts column to int
ip['counts'] = ip['counts'].astype(int).copy()
# now we group by city ans sum the total counts (total visits to the web)
cities = ip.groupby('city')['counts'].sum().rename_axis('total_city').reset_index(name = 'total_counts')
cities.sort_values(by=['total_city'], inplace = True)
cities

Unnamed: 0,total_city,total_counts
0,A Coruña,645
1,Abaran,511
2,Abidjan,2
3,Abu Dhabi,2
4,Acacias,1
...,...,...
736,el Prat de Llobregat,109
737,l'Hospitalet de l'Infant,205
738,Ávila,92
739,Écija,531


In [9]:
cities.total_counts.sum()

232929

In [10]:
# now we need the coordenates of each city, we have the coordenates of each ip address, but we only need one 
# coordenate per city, so we will delete all duplicate rows in city from our ip dataframe: 
ip2 = ip.drop_duplicates(subset = ['city'], keep = 'first').sort_values(by=['city'])
ip2

Unnamed: 0,latitude,longitude,city,counts
197,43.3666,-8.4068,A Coruña,177
32,38.2055,-1.3991,Abaran,511
2157,5.3097,-4.0127,Abidjan,2
2298,24.4667,54.3667,Abu Dhabi,2
2542,3.9869,-73.7580,Acacias,1
...,...,...,...,...
513,41.3278,2.0947,el Prat de Llobregat,109
279,40.9918,0.9230,l'Hospitalet de l'Infant,147
623,40.6572,-4.6995,Ávila,92
30,37.5422,-5.0826,Écija,531


In [11]:
# now we can concatenate cities and ip2 dataframes to see total visits and coordenates by city: 
total_cities = pd.concat([ip2.reset_index(drop = True), cities.reset_index(drop = True)], axis = 1)
# delete the columns we don't need anymore 
total_cities = total_cities.drop(['city', 'counts'], axis = 1)
total_cities

Unnamed: 0,latitude,longitude,total_city,total_counts
0,43.3666,-8.4068,A Coruña,645
1,38.2055,-1.3991,Abaran,511
2,5.3097,-4.0127,Abidjan,2
3,24.4667,54.3667,Abu Dhabi,2
4,3.9869,-73.7580,Acacias,1
...,...,...,...,...
736,41.3278,2.0947,el Prat de Llobregat,109
737,40.9918,0.9230,l'Hospitalet de l'Infant,205
738,40.6572,-4.6995,Ávila,92
739,37.5422,-5.0826,Écija,531


In [12]:
#create a new column with info on the city and the visits from each city
total_cities['total_counts'] = total_cities['total_counts'].astype(str)
total_cities['totals'] = total_cities[['total_city', 'total_counts']].agg(': '.join, axis=1)
total_cities['totals'] = total_cities['totals'] + ' visits'
total_cities

Unnamed: 0,latitude,longitude,total_city,total_counts,totals
0,43.3666,-8.4068,A Coruña,645,A Coruña: 645 visits
1,38.2055,-1.3991,Abaran,511,Abaran: 511 visits
2,5.3097,-4.0127,Abidjan,2,Abidjan: 2 visits
3,24.4667,54.3667,Abu Dhabi,2,Abu Dhabi: 2 visits
4,3.9869,-73.7580,Acacias,1,Acacias: 1 visits
...,...,...,...,...,...
736,41.3278,2.0947,el Prat de Llobregat,109,el Prat de Llobregat: 109 visits
737,40.9918,0.9230,l'Hospitalet de l'Infant,205,l'Hospitalet de l'Infant: 205 visits
738,40.6572,-4.6995,Ávila,92,Ávila: 92 visits
739,37.5422,-5.0826,Écija,531,Écija: 531 visits


In [13]:
# Creating the map and adding points to it

title_html = '''
             <h3 align="center" style="font-size:20px"><b>WEB CONNECTIONS BY CITY</b></h3>
             '''
# creation of the map: 
map = folium.Map(location = [ip.latitude.mean(), ip.longitude.mean()], 
               zoom_start = 2.50, control_scale= True)

# adding points to the map[]''
for index, location_info in total_cities.iterrows(): 
    folium.Marker([location_info['latitude'], location_info['longitude']], popup= location_info['totals'], 
                 clustered_marker = True,tooltip='<strong>Click here to see Popup</strong>', 
                icon=folium.Icon(color='green',prefix='fa',icon='wifi')).add_to(map)
map.get_root().html.add_child(folium.Element(title_html))

map

In [14]:
map.save('interactive_map.html')

The link to the interactive map on GitHub: 

https://elenaml81.github.io/Registre_de_logs/interactive_map.html