In [1]:
import numpy as np
import pandas as pd
import folium
from folium.plugins import MarkerCluster

In [2]:
df=pd.read_csv('internet_speed.csv')
df.head()

Unnamed: 0,Country,Broadband Speed Rank,Broadband Mbps,Mobile Speed Rank,Mobile Mbps,As of
0,Monaco,1.0,192.68,,,January 2022
1,Singapore,2.0,192.01,19.0,64.92,January 2022
2,Chile,3.0,189.36,109.0,15.5,January 2022
3,Thailand,4.0,184.03,56.0,32.38,January 2022
4,Hong Kong (SAR),5.0,173.42,38.0,45.86,January 2022


In [3]:
df.shape

(179, 6)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 179 entries, 0 to 178
Data columns (total 6 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Country               179 non-null    object 
 1   Broadband Speed Rank  179 non-null    float64
 2   Broadband Mbps        179 non-null    float64
 3   Mobile Speed Rank     139 non-null    float64
 4   Mobile Mbps           139 non-null    float64
 5   As of                 179 non-null    object 
dtypes: float64(4), object(2)
memory usage: 8.5+ KB


In [5]:
num_col=df.select_dtypes(include='float64')

In [6]:
print('Percentage of NaN in numerical features:\n',num_col.isna().sum())

Percentage of NaN in numerical features:
 Broadband Speed Rank     0
Broadband Mbps           0
Mobile Speed Rank       40
Mobile Mbps             40
dtype: int64


In [7]:
df.dropna(inplace=True)

In [8]:
df.shape

(139, 6)

In [9]:
df.drop(['As of'],axis=1,inplace=True)

In [10]:
df.head()

Unnamed: 0,Country,Broadband Speed Rank,Broadband Mbps,Mobile Speed Rank,Mobile Mbps
1,Singapore,2.0,192.01,19.0,64.92
2,Chile,3.0,189.36,109.0,15.5
3,Thailand,4.0,184.03,56.0,32.38
4,Hong Kong (SAR),5.0,173.42,38.0,45.86
5,Denmark,6.0,163.6,8.0,87.11


In [11]:
from pycountry_convert import country_alpha2_to_continent_code, country_name_to_country_alpha2
def get_continent(col):
    try:
        cn_a2_code =  country_name_to_country_alpha2(col)
    except:
        cn_a2_code = 'Unknown' 
    try:
        cn_continent = country_alpha2_to_continent_code(cn_a2_code)
    except:
        cn_continent = 'Unknown' 
    return (cn_a2_code, cn_continent,col)

In [12]:
b=[]
for i in df['Country']:
    a=get_continent(i)
    b.append(a)

In [13]:
df1=pd.DataFrame(b,columns=['country_code','continent','Country'])

In [14]:
df1.info(),df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 139 entries, 0 to 138
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   country_code  139 non-null    object
 1   continent     139 non-null    object
 2   Country       139 non-null    object
dtypes: object(3)
memory usage: 3.4+ KB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 139 entries, 1 to 178
Data columns (total 5 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Country               139 non-null    object 
 1   Broadband Speed Rank  139 non-null    float64
 2   Broadband Mbps        139 non-null    float64
 3   Mobile Speed Rank     139 non-null    float64
 4   Mobile Mbps           139 non-null    float64
dtypes: float64(4), object(1)
memory usage: 6.5+ KB


(None, None)

In [15]:
pd.concat([df,df1],join='inner',ignore_index=True)

Unnamed: 0,Country
0,Singapore
1,Chile
2,Thailand
3,Hong Kong (SAR)
4,Denmark
...,...
273,Yemen
274,Ethiopia
275,Cuba
276,Turkmenistan


In [16]:
df2=pd.merge(left=df,right=df1,how='inner',on=['Country'])

In [17]:
df2.shape

(139, 7)

In [18]:

from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="sourabh")
def geolocate(country):
    try:
        loc = geolocator.geocode(country)
        return (loc.latitude, loc.longitude,country)
    except:
        return np.nan

In [19]:
c=[]
for i in df['Country']:
    d=geolocate(i)
    c.append(d)

In [20]:
df3=pd.DataFrame(c,columns=['latitude','longitude','Country'])

In [21]:
df4=pd.merge(left=df2,right=df3,how='inner',on=['Country'])

In [22]:
df4.head()

Unnamed: 0,Country,Broadband Speed Rank,Broadband Mbps,Mobile Speed Rank,Mobile Mbps,country_code,continent,latitude,longitude
0,Singapore,2.0,192.01,19.0,64.92,SG,AS,1.357107,103.819499
1,Chile,3.0,189.36,109.0,15.5,CL,SA,-31.761336,-71.31877
2,Thailand,4.0,184.03,56.0,32.38,TH,AS,14.897192,100.83273
3,Hong Kong (SAR),5.0,173.42,38.0,45.86,Unknown,Unknown,22.350627,114.184916
4,Denmark,6.0,163.6,8.0,87.11,DK,EU,55.670249,10.333328


In [23]:
df4[['Broadband Speed Rank','Mobile Speed Rank']]=df4[['Broadband Speed Rank','Mobile Speed Rank']].astype(int)

In [24]:
df4['country']=df4['Country'].values.astype(str)

In [25]:
df4.head()

Unnamed: 0,Country,Broadband Speed Rank,Broadband Mbps,Mobile Speed Rank,Mobile Mbps,country_code,continent,latitude,longitude,country
0,Singapore,2,192.01,19,64.92,SG,AS,1.357107,103.819499,Singapore
1,Chile,3,189.36,109,15.5,CL,SA,-31.761336,-71.31877,Chile
2,Thailand,4,184.03,56,32.38,TH,AS,14.897192,100.83273,Thailand
3,Hong Kong (SAR),5,173.42,38,45.86,Unknown,Unknown,22.350627,114.184916,Hong Kong (SAR)
4,Denmark,6,163.6,8,87.11,DK,EU,55.670249,10.333328,Denmark


In [26]:
print(df4.dtypes)

Country                  object
Broadband Speed Rank      int32
Broadband Mbps          float64
Mobile Speed Rank         int32
Mobile Mbps             float64
country_code             object
continent                object
latitude                float64
longitude               float64
country                  object
dtype: object


In [29]:
world_map= folium.Map(tiles="cartodbpositron")
marker_cluster = MarkerCluster().add_to(world_map)
def popup_html(row):
    i=row
    country=df4.iloc[i]['Country']
    rank  =df4.iloc[i]['Broadband Speed Rank']
    speed   =df4.iloc[i]['Broadband Mbps']
    html= """<!DOCTYPE html>
<html> 
    
    
<head style='background-color:#949398FF'>    
    <h1 style='font-size:30px;color:#5B84B1FF'>{}""".format(country) +"""</h1>
    </head>
    <body >
             <p style='color:#FC766AFF'>Rank:</p>{}""".format(rank) +"""
             <p style='color:#FC766AFF'>Speed(mbps):</p>{}""".format(speed) +"""
             
     </body>        
            </html> 
             """
    
    return  html
    
for i in range(len(df4)):
        lat = df4.iloc[i]['latitude']
        long = df4.iloc[i]['longitude']
        radius=5
        html=popup_html(i)
        popup = folium.Popup(folium.Html(html, script=True))
        folium.CircleMarker(location = [lat, long], radius=radius, popup= popup, fill =True).add_to(marker_cluster)
title_html = '''
             <h3 align="center" style="font-size:20px;text-transform: capitalize"><b>Broadband speed & ranking</b></h3>
             '''
#show the map
world_map.get_root().html.add_child(folium.Element(title_html))
world_map
world_map.save(outfile="Map.html")