### Data Cleaning & Interactive Folium Maps for Final Phases: Daily Observer

In [None]:
#!pip install -U spacy
#!python -m spacy download en
#!pip install geopy

In [1]:
# for manipulating dataframes
import pandas as pd
import numpy as np
# for natural language processing: named entity recognition
import spacy
from collections import Counter
import en_core_web_sm
nlp = en_core_web_sm.load()
# for visualizations
%matplotlib inline

from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="jjj")

import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('../input/dhakatribunecomplete/TheDailyObserver-cleaned-final.csv')
df = df[['links', 'Year', 'date']]

In [3]:
df_new = pd.read_csv('../input/dhakatribunecomplete/TheDailyObserver.csv')
df_new

Unnamed: 0,links,titles,News
0,https://www.observerbd.com/news.php?id=319689,"Mother Killed, police official injured in Mada...","Mother Killed, police official injured in Mada..."
1,https://www.observerbd.com/news.php?id=319655,Three killed in Satkhira bike accident,Three killed in Satkhira bike accidentThree me...
2,https://www.observerbd.com/news.php?id=319338,"2 killed, 5 injured in Cumilla road accident\n",Two were killed and five others were severely ...
3,https://www.observerbd.com/news.php?id=319194,Four killed in Jashore accident on way to buy ...,Four killed in Jashore accident on way to buy ...
4,https://www.observerbd.com/news.php?id=319024,Two killed in Mymensingh road accident,Two killed in Mymensingh road accident Two peo...
...,...,...,...
804,https://www.observerbd.com/news.php?id=176806,Mother-son killed in road accident\n,A woman and her son were killed while her daug...
805,https://www.observerbd.com/news.php?id=176637,Sirajganj road accident kills 3,Sirajganj road accident kills 3Three people we...
806,https://www.observerbd.com/news.php?id=176524,Teacher killed in road accident,Teacher killed in road accident A retired teac...
807,https://www.observerbd.com/news.php?id=176389,3 of a family die in Faridpur road accident,3 of a family die in Faridpur road accidentThr...


In [4]:
df1 = df.merge(df_new, how='left', on ='links')
df1

Unnamed: 0,links,Year,date,titles,News
0,https://www.observerbd.com/news.php?id=319689,2021,30-Jun,"Mother Killed, police official injured in Mada...","Mother Killed, police official injured in Mada..."
1,https://www.observerbd.com/news.php?id=319655,2021,30-Jun,Three killed in Satkhira bike accident,Three killed in Satkhira bike accidentThree me...
2,https://www.observerbd.com/news.php?id=319338,2021,28-Jun,"2 killed, 5 injured in Cumilla road accident\n",Two were killed and five others were severely ...
3,https://www.observerbd.com/news.php?id=319194,2021,27-Jun,Four killed in Jashore accident on way to buy ...,Four killed in Jashore accident on way to buy ...
4,https://www.observerbd.com/news.php?id=319024,2021,26-Jun,Two killed in Mymensingh road accident,Two killed in Mymensingh road accident Two peo...
...,...,...,...,...,...
1382,https://www.observerbd.com/news.php?id=176806,2019,06-Jan,Mother-son killed in road accident\n,A woman and her son were killed while her daug...
1383,https://www.observerbd.com/news.php?id=176637,2019,05-Jan,Sirajganj road accident kills 3,Sirajganj road accident kills 3Three people we...
1384,https://www.observerbd.com/news.php?id=176524,2019,04-Jan,Teacher killed in road accident,Teacher killed in road accident A retired teac...
1385,https://www.observerbd.com/news.php?id=176389,2019,03-Jan,3 of a family die in Faridpur road accident,3 of a family die in Faridpur road accidentThr...


### Employing spaCy to retrive the GPEs

In [5]:
%%time

col_value = []

for i in range(len(df1)):
    tokens = nlp(''.join(str(df1['News'][i])))
    
    location_list = []
    for ent in tokens.ents:
        if ent.label_ == 'GPE':
            location_list.append(ent.text)
    
    
    location_counts = Counter(location_list).most_common(1)
    col_value.append(''.join(filter(str.isalpha, str(location_counts))))

CPU times: user 55.4 s, sys: 84.5 ms, total: 55.4 s
Wall time: 55.5 s


In [6]:
df1['Location_appox'] = col_value

In [7]:
# dropping duplicate values
df1 = df1.drop_duplicates(keep=False)
df1

Unnamed: 0,links,Year,date,titles,News,Location_appox
0,https://www.observerbd.com/news.php?id=319689,2021,30-Jun,"Mother Killed, police official injured in Mada...","Mother Killed, police official injured in Mada...",
1,https://www.observerbd.com/news.php?id=319655,2021,30-Jun,Three killed in Satkhira bike accident,Three killed in Satkhira bike accidentThree me...,
2,https://www.observerbd.com/news.php?id=319338,2021,28-Jun,"2 killed, 5 injured in Cumilla road accident\n",Two were killed and five others were severely ...,Chandina
3,https://www.observerbd.com/news.php?id=319194,2021,27-Jun,Four killed in Jashore accident on way to buy ...,Four killed in Jashore accident on way to buy ...,Jashore
4,https://www.observerbd.com/news.php?id=319024,2021,26-Jun,Two killed in Mymensingh road accident,Two killed in Mymensingh road accident Two peo...,Jangaldia
...,...,...,...,...,...,...
1382,https://www.observerbd.com/news.php?id=176806,2019,06-Jan,Mother-son killed in road accident\n,A woman and her son were killed while her daug...,Godaikandi
1383,https://www.observerbd.com/news.php?id=176637,2019,05-Jan,Sirajganj road accident kills 3,Sirajganj road accident kills 3Three people we...,
1384,https://www.observerbd.com/news.php?id=176524,2019,04-Jan,Teacher killed in road accident,Teacher killed in road accident A retired teac...,Kasba
1385,https://www.observerbd.com/news.php?id=176389,2019,03-Jan,3 of a family die in Faridpur road accident,3 of a family die in Faridpur road accidentThr...,Boalmari


### Crazy cleaning starts here

In [8]:
df1['Location_appox'] = df1['Location_appox'].replace( '', np.nan)
df1['Location_appox'] = df1['Location_appox'].replace( 'DhakaAricha', 'Dhaka')
df1['Location_appox'] = df1['Location_appox'].replace( 'HaziparaPatrolPump', 'Dhaka')
df1['Location_appox'] = df1['Location_appox'].replace( 'Kamarkhand', 'Sirajganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'Chhagalnaiya', 'Feni')
df1['Location_appox'] = df1['Location_appox'].replace( 'Lohaghara', 'Chittagong')
df1['Location_appox'] = df1['Location_appox'].replace( 'Boilgaon', 'Chittagong')
df1['Location_appox'] = df1['Location_appox'].replace( 'Banshkhali Upazila', 'Chittagong')
df1['Location_appox'] = df1['Location_appox'].replace( 'Nodigram', 'Nandigram')
df1['Location_appox'] = df1['Location_appox'].replace( 'Nondigram', 'Nandigram')
df1['Location_appox'] = df1['Location_appox'].replace( 'PabnaaccidentA', 'Pabna')

df1['Location_appox'] = df1['Location_appox'].replace( 'Jalashivillage', 'Panchagarh')
df1['Location_appox'] = df1['Location_appox'].replace( 'Katalivillage', 'Panchagarh')
df1['Location_appox'] = df1['Location_appox'].replace( 'Joyrampur', 'Dhaka')
df1['Location_appox'] = df1['Location_appox'].replace( 'Phulbarimunicipality', 'Dinajpur')
df1['Location_appox'] = df1['Location_appox'].replace( 'JhalakatiDistrict', 'Jhalakati')
df1['Location_appox'] = df1['Location_appox'].replace( 'Sylhetcity', 'Sylhet')
df1['Location_appox'] = df1['Location_appox'].replace( 'Sylhetcity', 'Sylhet')
df1['Location_appox'] = df1['Location_appox'].replace( 'KolaroaJashore', 'Jashore')
df1['Location_appox'] = df1['Location_appox'].replace( 'PatharghataUpazila', 'Barguna')
df1['Location_appox'] = df1['Location_appox'].replace( 'Kaloroa', 'Satkhira')
df1['Location_appox'] = df1['Location_appox'].replace( 'Joypuhat', 'Joypurhat')
df1['Location_appox'] = df1['Location_appox'].replace( 'Naogao', 'Naogaon')
df1['Location_appox'] = df1['Location_appox'].replace( 'EastHajirpara', 'Dhaka')
df1['Location_appox'] = df1['Location_appox'].replace( 'Hazirparavillage', 'Chittagong')
df1['Location_appox'] = df1['Location_appox'].replace( 'AfajiaBazar', 'Hatiya Island')
df1['Location_appox'] = df1['Location_appox'].replace( 'Kurigrams', 'Kurigram')
df1['Location_appox'] = df1['Location_appox'].replace( 'Bbaria', 'Brahmanbaria')
df1['Location_appox'] = df1['Location_appox'].replace( 'Awravillage', 'Joypurhat')
df1['Location_appox'] = df1['Location_appox'].replace( 'Lakkatura', 'Sylhet')
df1['Location_appox'] = df1['Location_appox'].replace( 'DaganbhuiyanBazar', 'Chittagong')
df1['Location_appox'] = df1['Location_appox'].replace( 'FaridpuraccidentTwo', 'Faridpur')
df1['Location_appox'] = df1['Location_appox'].replace( 'DaganbhuiyanBazar', 'Chittagong')
df1['Location_appox'] = df1['Location_appox'].replace( 'DaganbhuiyanBazar', 'Chittagong')
df1['Location_appox'] = df1['Location_appox'].replace( 'Chechniakandi', 'Gopalganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'Charfesson', 'Barisal')
df1['Location_appox'] = df1['Location_appox'].replace( 'Horirampur', 'Harirampur')
df1['Location_appox'] = df1['Location_appox'].replace( 'Mymensinghs', 'Mymensingh')
df1['Location_appox'] = df1['Location_appox'].replace( 'Rusulpura', 'Tangail')
df1['Location_appox'] = df1['Location_appox'].replace( 'Rajshahiparavillage', 'Rajshahi')
df1['Location_appox'] = df1['Location_appox'].replace( 'Binaivillage', 'Joypurhat')
df1['Location_appox'] = df1['Location_appox'].replace( 'JashoreBenapole', 'Jashore')
df1['Location_appox'] = df1['Location_appox'].replace( 'Sheanpara', 'Patiya Upazila')
df1['Location_appox'] = df1['Location_appox'].replace( 'ChoughoriBazar', 'Sylhet')
df1['Location_appox'] = df1['Location_appox'].replace( 'GararonVillage', 'Gazipur')
df1['Location_appox'] = df1['Location_appox'].replace( 'Subarnachar', 'Noakhali')
df1['Location_appox'] = df1['Location_appox'].replace( 'Ctg', 'Chittagong')
df1['Location_appox'] = df1['Location_appox'].replace( 'MasrutNakhenda', 'Kurigram')
df1['Location_appox'] = df1['Location_appox'].replace( 'PaschimRampur', 'Sylhet')
df1['Location_appox'] = df1['Location_appox'].replace( 'Fulgazi', 'Feni')
df1['Location_appox'] = df1['Location_appox'].replace( 'UllaparaVillage', 'Sirajganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'Supariparavillage', 'Thakurgaon')
df1['Location_appox'] = df1['Location_appox'].replace( 'AlamBazar', 'Chittagong')
df1['Location_appox'] = df1['Location_appox'].replace( 'Pashchimdasra', 'Manikganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'Borashura', 'Gopalganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'Korbaniaghona', 'Chittagong')
df1['Location_appox'] = df1['Location_appox'].replace( 'Dhakacity', 'Dhaka')
df1['Location_appox'] = df1['Location_appox'].replace( 'Patkelghata', 'Khulna')
df1['Location_appox'] = df1['Location_appox'].replace( 'Bakhorerkandi', 'Madaripur')
df1['Location_appox'] = df1['Location_appox'].replace( 'MollikBariBazar', 'Mymensingh')
df1['Location_appox'] = df1['Location_appox'].replace( 'Baniarchhara', "Cox's Bazar")
df1['Location_appox'] = df1['Location_appox'].replace( 'HajirhatTalpatti', 'Chittagong')
df1['Location_appox'] = df1['Location_appox'].replace( 'Kanaitola', 'Jhenaidah')
df1['Location_appox'] = df1['Location_appox'].replace( 'Cox', "Cox's Bazar")
df1['Location_appox'] = df1['Location_appox'].replace( 'KuakataBeach', 'Patuakhali')
df1['Location_appox'] = df1['Location_appox'].replace( 'MathbariaUpazila', 'Mathbaria Upazila')
df1['Location_appox'] = df1['Location_appox'].replace( 'Palashpur', 'Khagrachari')
df1['Location_appox'] = df1['Location_appox'].replace( 'Simultala', 'Dhaka')
df1['Location_appox'] = df1['Location_appox'].replace( 'Nabogram', 'Dhaka')
df1['Location_appox'] = df1['Location_appox'].replace( 'Kamarkhanda', 'Sirajganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'Tetultoly', 'Mymensingh')
df1['Location_appox'] = df1['Location_appox'].replace( 'Hirapurvillage', 'Lakshmipur')

df1['Location_appox'] = df1['Location_appox'].replace( 'BhandariaUpazila', 'Pirojpur')
df1['Location_appox'] = df1['Location_appox'].replace( 'SreepurUpazila', 'Gazipur')
df1['Location_appox'] = df1['Location_appox'].replace( 'NoaparaVillage', 'Noapara')
df1['Location_appox'] = df1['Location_appox'].replace( 'BaroharishpurBypass', 'Bheramara')
df1['Location_appox'] = df1['Location_appox'].replace( 'DemraTrafficZone', 'Dhaka')
df1['Location_appox'] = df1['Location_appox'].replace( 'Kapastala', 'Dhaka')

df1['Location_appox'] = df1['Location_appox'].replace( 'Kakdanga', 'Khulna')
df1['Location_appox'] = df1['Location_appox'].replace( 'Godaikandi', 'Netrokona')
df1['Location_appox'] = df1['Location_appox'].replace( 'Nirwarispur', 'Noakhali')
df1['Location_appox'] = df1['Location_appox'].replace( 'Dholiapara', 'Dhaka')
df1['Location_appox'] = df1['Location_appox'].replace( 'NaudaparaAmchattar', 'Noapara')
df1['Location_appox'] = df1['Location_appox'].replace( 'Paolanpur', 'Narshingdi')

df1['Location_appox'] = df1['Location_appox'].replace( 'Sapahar', 'Rajshahi')
df1['Location_appox'] = df1['Location_appox'].replace( 'Louhajang', 'Munshiganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'BelkuchiUpazila', 'Sirajganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'Soidabad', 'Saidabad')
df1['Location_appox'] = df1['Location_appox'].replace( 'Orakandi', 'Faridpur')
df1['Location_appox'] = df1['Location_appox'].replace( 'Moksudpur', 'Gopalganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'Tilchara', 'Gopalganj')

df1['Location_appox'] = df1['Location_appox'].replace( 'Uzirpur', 'Barisal')
df1['Location_appox'] = df1['Location_appox'].replace( 'Boraigram', 'Natore')
df1['Location_appox'] = df1['Location_appox'].replace( 'Sayedabad', 'Dhaka')
df1['Location_appox'] = df1['Location_appox'].replace( 'Geor', 'Ghior')
df1['Location_appox'] = df1['Location_appox'].replace( 'Chattagram', 'Chittagong')
df1['Location_appox'] = df1['Location_appox'].replace( 'Chouddagram', 'Cumilla')
df1['Location_appox'] = df1['Location_appox'].replace( 'Gadaikandi', 'Netrokona')

df1['Location_appox'] = df1['Location_appox'].replace( 'Tentulia', 'Panchagarh')
df1['Location_appox'] = df1['Location_appox'].replace( 'Laxmipur', 'Lakshmipur')
df1['Location_appox'] = df1['Location_appox'].replace( 'Jaintapur', 'Sylhet')
df1['Location_appox'] = df1['Location_appox'].replace( 'Rangunia', 'Chittagong')
df1['Location_appox'] = df1['Location_appox'].replace( 'Kotalipara', 'Gopalganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'Jhenidah', 'Jhenaidah')
df1['Location_appox'] = df1['Location_appox'].replace( 'Gadaikandi', 'Netrokona')

df1['Location_appox'] = df1['Location_appox'].replace( 'Jhalakati', 'Jhalokati')
df1['Location_appox'] = df1['Location_appox'].replace( 'Jhenidah', 'Jhenaidah')
df1['Location_appox'] = df1['Location_appox'].replace( 'Gadaikandi', 'Netrokona')
df1['Location_appox'] = df1['Location_appox'].replace( 'Baraigram', 'Natore')

df1['Location_appox'] = df1['Location_appox'].replace( 'southPalashpurofBarishalcity', 'Barishal')
df1['Location_appox'] = df1['Location_appox'].replace( 'ShajahanpurUpazila', 'Bogra')
df1['Location_appox'] = df1['Location_appox'].replace( 'GomastapurUpazila', 'Nawabganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'Gabindaganj', 'Gaibandha')

df1['Location_appox'] = df1['Location_appox'].replace( 'FakirhatUpazila', 'Bagerhat')
df1['Location_appox'] = df1['Location_appox'].replace( 'Rousonbagh', 'Panchagarh')
df1['Location_appox'] = df1['Location_appox'].replace( 'BrahmanbariasNabinagar', 'Brahmanbaria')
df1['Location_appox'] = df1['Location_appox'].replace( 'Ramgar', 'Khagrachari')

df1['Location_appox'] = df1['Location_appox'].replace( 'Kashiadanga', 'Rajshahi')
df1['Location_appox'] = df1['Location_appox'].replace( 'Gomostapur', 'Nawabganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'Khanjanamara', 'Rangpur')
df1['Location_appox'] = df1['Location_appox'].replace( 'Khejurtola', 'Chuadanga')

df1['Location_appox'] = df1['Location_appox'].replace( 'PakshiUnion', 'Pabna')
df1['Location_appox'] = df1['Location_appox'].replace( 'LamaMunicipality', 'Lama')
df1['Location_appox'] = df1['Location_appox'].replace( 'Rasulpurvillage', 'Tangail')
df1['Location_appox'] = df1['Location_appox'].replace( 'Manikchhari', 'Chittagong')

df1['Location_appox'] = df1['Location_appox'].replace( 'ChattogramCity', 'Chittagong')
df1['Location_appox'] = df1['Location_appox'].replace( 'NoakhaliDistrict', 'Noakhali')
df1['Location_appox'] = df1['Location_appox'].replace( 'KhulnaCityCorporation', 'Khulna')
df1['Location_appox'] = df1['Location_appox'].replace( 'NatoreDistrict', 'Natore')

df1['Location_appox'] = df1['Location_appox'].replace( 'CumillaCity', 'Cumilla')
df1['Location_appox'] = df1['Location_appox'].replace( 'NoakhaliDistrict', 'Noakhali')
df1['Location_appox'] = df1['Location_appox'].replace( 'BashundharaResidentialArea', 'Dhaka')
df1['Location_appox'] = df1['Location_appox'].replace( 'KhulnaMawa', 'Khulna')

df1['Location_appox'] = df1['Location_appox'].replace( 'Godagai', 'Rajshahi')
df1['Location_appox'] = df1['Location_appox'].replace( 'Kalignaj', 'Gazipur')
df1['Location_appox'] = df1['Location_appox'].replace( 'Vektamari', 'Bagerhat')
df1['Location_appox'] = df1['Location_appox'].replace( 'AkkelpurMunicipality', 'Joypurhat')
df1['Location_appox'] = df1['Location_appox'].replace( 'Baufal', 'Patuakhali')
df1['Location_appox'] = df1['Location_appox'].replace( 'BonparaMunicipality', 'Natore')
df1['Location_appox'] = df1['Location_appox'].replace( 'Piljanga', 'Bhola')
df1['Location_appox'] = df1['Location_appox'].replace( 'ShibcharUpazila', 'Dhaka')
df1['Location_appox'] = df1['Location_appox'].replace( 'Kamalnagar', 'Chittagong')
df1['Location_appox'] = df1['Location_appox'].replace( 'Saddarparavillage', 'Gaibandha')
df1['Location_appox'] = df1['Location_appox'].replace( 'MohadevpurUpazila', 'Rajshahi')
df1['Location_appox'] = df1['Location_appox'].replace( 'Madariganj', 'Jamalpur')
df1['Location_appox'] = df1['Location_appox'].replace( 'Shahipara', 'Rangpur')
df1['Location_appox'] = df1['Location_appox'].replace( 'Kalikapurvillage', 'Satkhira')
df1['Location_appox'] = df1['Location_appox'].replace( 'Jafarpurvillage', 'Jashore')
df1['Location_appox'] = df1['Location_appox'].replace( 'Nagardia', 'Faridpur')
df1['Location_appox'] = df1['Location_appox'].replace( 'ChawkgopalVillage', 'Sylhet')
df1['Location_appox'] = df1['Location_appox'].replace( 'Lemua', 'Barguna')
df1['Location_appox'] = df1['Location_appox'].replace( 'Noakati', 'Khulna')
df1['Location_appox'] = df1['Location_appox'].replace( 'Pirgachha', 'Rangpur')
df1['Location_appox'] = df1['Location_appox'].replace( 'Ujanpara', 'Rajshahi')
df1['Location_appox'] = df1['Location_appox'].replace( 'Telijana', 'Sirajganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'Bukailvillage', 'Faridpur')
df1['Location_appox'] = df1['Location_appox'].replace( 'Bhobanipurvillage', 'Meherpur')
df1['Location_appox'] = df1['Location_appox'].replace( 'BegumganjUpazila', 'Noakhali')
df1['Location_appox'] = df1['Location_appox'].replace( 'Kellaposhi', 'Bogura')
df1['Location_appox'] = df1['Location_appox'].replace( 'JangalbariVillage', 'Kishoreganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'ShailkupaUpazila', 'Jhenaidah')
df1['Location_appox'] = df1['Location_appox'].replace( 'Alalpur', 'Dhaka')
df1['Location_appox'] = df1['Location_appox'].replace( 'Hrila', 'Teknaf')
df1['Location_appox'] = df1['Location_appox'].replace( 'Ullahpara', 'Sirajganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'BarishalSher', 'Barisal')
df1['Location_appox'] = df1['Location_appox'].replace( 'Bhennapara', 'Gopalganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'KasbaUpazila', 'Brahmanbaria')
df1['Location_appox'] = df1['Location_appox'].replace( 'Gosainagar', 'Manikganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'UlipurUpazila', 'Rangpur')
df1['Location_appox'] = df1['Location_appox'].replace( 'Aiirkhamar', 'Lalmonirhat')
df1['Location_appox'] = df1['Location_appox'].replace( 'SapaharUpazila', 'Naogaon')
df1['Location_appox'] = df1['Location_appox'].replace( 'Karnopur', 'Gazipur')
df1['Location_appox'] = df1['Location_appox'].replace( 'Narayangaj', 'Narayanganj')
df1['Location_appox'] = df1['Location_appox'].replace( 'Gopalaganj', 'Gopalganj')


X = ['MofizulKarigor', 'BariMajlis', 'Peru', 'minibus', 'Signboard', 'AnwerHossain', 'Jangaldia', 'KundaisVillage', "Nazma", 'Jugantar', "Canada", 'Sonmandi', "Bangladesh", "Oman", "I0"\
    'Banganadhu', 'Banganadhu', "Anik", "Bokcharavillage", "Hanif","India", "Kabul", "Cuba", "Farakkabandhunion", "Md", "IO", "Jannatullah", "MansurAkon", "Votvoti", "Yeasin", "Ghana",\
    "Nuzrul", "Madrasha", "Pasadena", "Jasmin", "Jakir", "Mridha", "Abdur", "SaudiArabia", "Tharhaat", "SI", "Borhanuddin", "AbulHossain", 'Bolivia', "Mujahid", \
    "Akhtar", "AbdurRazzaq", "Rafiqul", 'Tazreen', "Liton", "Jeddah", "Iran", "Rasel", "ObilerBazar", "Egypt", "BarAwlia", "Malaysia", "Bypass", "Imran", "DistrictSpecialBranch",\
    "Mohishavillage", 'Pamerpara', "Nagardiavillage", "Nijkhamar", 'Paotana', 'TanvirHossain', 'Bhorbhoravillage', 'Baimhativillage', 'Molliker', 'Chandrakhanavillage',\
    'GhanekrishtapurVillage', 'Mulaid', "ShimultolyVillage", 'SunoiVillage', 'Fakhrul', 'DinajpurThakurgaon', 'DhakaSylhet', 'DhakaArichaHighway', 'Baikhirvillage', 'DoldolaVillage'\
    'Dhonialapara', 'HasliVillage', 'Goriarpar', 'Abudhabi', 'BazarTeknaf', 'AbuDhabi', 'BILS', "Israfil", "Bangladesh", 'Jaintapur', 'Rangunia', 'Jamtala', 'Kotalipara', 'Jhenidah', 'Palmas',       
    'Jalpaiguri', 'Jhalakati', 'Bhangura', 'Andharijhar', 'Balrampur', 'Rishipara', 'Osmaninagar', 'Sarangpur', 'Jazira', 'Sujatpur', 'Hafez', 'Jammu', 'Ghonapara', 'Abhaynagar',
    'Moragang', 'Paba', 'Kathalbari', 'SriLanka', 'Riyadh', 'Jibonpur', 'Chuniapara', 'Rahimabad', 'Kolabari', 'Pakati', 'Shimulia', 'Bikash', 'Dhonialapara', 'Utsab', 'DoldolaVillage',\
    "Kolkata", 'Chhadaha', "Mexico", 'UpalshohorVillage', 'NitaiMazumdar', 'Munshiganjroad', 'Mohendra', 'Mosharhati', 'Goshercharvillage', 'Bailjuri', 'Mandartola', 'Durgapurvillage', \
     'HogalpashaVillage', 'Tiktikipara', 'Sadikul', 'Swapan', 'TokimunnesaBegum', 'Hamidul', 'accidentShabanaAzmi', 'BelalHossain', 'NurHossain', 'Organising', 'Meduari', 'Baddipur',\
     'Banshgara', 'Bhojergati', 'Shibgati', 'Gualbari', 'Octogenarian', 'Hanjala', 'FoujdarhatBypass', 'Bakcharavillage', 'FulpurUpazillas', 'AramnagarVillage', 'Chhilimpur',\
     'Sutahati', 'DhipikuraVillage', 'Nayadingi', 'Grambandi', 'TF', 'Ruhul', 'Nasrin', 'SB', "Sajib", "Bappi", "Kashem", "Hashem", "Kalu", 'Mabia', "Fahad", "Kamrul", "Uttar", "Dulal", \
    "Elderly", "Mahbub", "LA", "City", "RI", "Hadi", "Taslima", "Parvez", "Roxy"]
     

df1.loc[df1.Location_appox.isin(X), 'Location_appox'] = np.NaN

In [9]:
df1 = df1.dropna()
df1

Unnamed: 0,links,Year,date,titles,News,Location_appox
2,https://www.observerbd.com/news.php?id=319338,2021,28-Jun,"2 killed, 5 injured in Cumilla road accident\n",Two were killed and five others were severely ...,Chandina
3,https://www.observerbd.com/news.php?id=319194,2021,27-Jun,Four killed in Jashore accident on way to buy ...,Four killed in Jashore accident on way to buy ...,Jashore
10,https://www.observerbd.com/news.php?id=318258,2021,21-Jun,"Man, nephew killed in Chattogram bike accident","Man, nephew killed in Chattogram bike accident...",Mirsarai
26,https://www.observerbd.com/news.php?id=317968,2021,19-Jun,NGO worker killed in Bhola road accident,NGO worker killed in Bhola road accidentA work...,Bhola
27,https://www.observerbd.com/news.php?id=317963,2021,19-Jun,Three killed in Narsingdi road accident,Three killed in Narsingdi road accidentThree p...,Narsingdi
...,...,...,...,...,...,...
1381,https://www.observerbd.com/news.php?id=176928,2019,07-Jan,"Mother, son killed in Netrakona road accident","NETRAKONA, Jan 6: A mother and her son were ki...",Dhaka
1382,https://www.observerbd.com/news.php?id=176806,2019,06-Jan,Mother-son killed in road accident\n,A woman and her son were killed while her daug...,Netrokona
1384,https://www.observerbd.com/news.php?id=176524,2019,04-Jan,Teacher killed in road accident,Teacher killed in road accident A retired teac...,Kasba
1385,https://www.observerbd.com/news.php?id=176389,2019,03-Jan,3 of a family die in Faridpur road accident,3 of a family die in Faridpur road accidentThr...,Boalmari


In [10]:
a = list(df1.Location_appox.unique())

coordinates = []

def loc_errors(a):
    list_of_error_loc = []
    for loc in a:
        try:
            location = geolocator.geocode(str(loc + ", Bangladesh"))
            lat_long = (location.latitude, location.longitude)
            coordinates.append(list(lat_long))
        except Exception:
            list_of_error_loc.append(loc)
            continue
    return list_of_error_loc

%time list_of_error_loc = loc_errors(a)
list_of_error_loc

CPU times: user 472 ms, sys: 36.9 ms, total: 508 ms
Wall time: 1min 47s


[]

In [11]:
df1 = df1.reset_index(drop = True)

In [12]:
df1.Location_appox = df1['Location_appox'] + ', Bangladesh'
df1.head()

Unnamed: 0,links,Year,date,titles,News,Location_appox
0,https://www.observerbd.com/news.php?id=319338,2021,28-Jun,"2 killed, 5 injured in Cumilla road accident\n",Two were killed and five others were severely ...,"Chandina, Bangladesh"
1,https://www.observerbd.com/news.php?id=319194,2021,27-Jun,Four killed in Jashore accident on way to buy ...,Four killed in Jashore accident on way to buy ...,"Jashore, Bangladesh"
2,https://www.observerbd.com/news.php?id=318258,2021,21-Jun,"Man, nephew killed in Chattogram bike accident","Man, nephew killed in Chattogram bike accident...","Mirsarai, Bangladesh"
3,https://www.observerbd.com/news.php?id=317968,2021,19-Jun,NGO worker killed in Bhola road accident,NGO worker killed in Bhola road accidentA work...,"Bhola, Bangladesh"
4,https://www.observerbd.com/news.php?id=317963,2021,19-Jun,Three killed in Narsingdi road accident,Three killed in Narsingdi road accidentThree p...,"Narsingdi, Bangladesh"


### Using GeoPy to retrive coordinates of the locations

In [13]:
%%time

loc_bengali = []
lat = []
lon = []

for i in range(len(df1)):
    location = geolocator.geocode(df1.Location_appox.values[i])
    print(df1.Location_appox.values[i])
    loc_bengali.append(location.address)
    lat.append(location.latitude)
    lon.append(location.longitude)

Chandina, Bangladesh
Jashore, Bangladesh
Mirsarai, Bangladesh
Bhola, Bangladesh
Narsingdi, Bangladesh
Bogura, Bangladesh
Narsingdi, Bangladesh
Bhandaria, Bangladesh
Dhaka, Bangladesh
Bangabandhu, Bangladesh
Barishal, Bangladesh
Manikganj, Bangladesh
Dhamrai, Bangladesh
Dinajpur, Bangladesh
Ashulia, Bangladesh
Bogra, Bangladesh
Manda, Bangladesh
Bhurungamari, Bangladesh
Noakhali, Bangladesh
Shahjadpur, Bangladesh
Dhaka, Bangladesh
Raghunathpur, Bangladesh
Chattogram, Bangladesh
Narail, Bangladesh
Narail, Bangladesh
Narayanganj, Bangladesh
Dhamrai, Bangladesh
Benapole, Bangladesh
Gazipur, Bangladesh
Gazipur, Bangladesh
Chattogram, Bangladesh
Sirajganj, Bangladesh
Kalikapur, Bangladesh
Noakhali, Bangladesh
Shahzadpur, Bangladesh
Sylhet, Bangladesh
Kaliganj, Bangladesh
Narayanganj, Bangladesh
Rangpur, Bangladesh
Thakurgaon, Bangladesh
Gouripur, Bangladesh
Nandigram, Bangladesh
Dighi, Bangladesh
Nagarkanda, Bangladesh
Shahjadpur, Bangladesh
Gaibandha, Bangladesh
Magura, Bangladesh
Dhaka, Ba

In [14]:
df1['location'] = loc_bengali
df1['Latitude'] = lat
df1['Longitude'] = lon
df1

Unnamed: 0,links,Year,date,titles,News,Location_appox,location,Latitude,Longitude
0,https://www.observerbd.com/news.php?id=319338,2021,28-Jun,"2 killed, 5 injured in Cumilla road accident\n",Two were killed and five others were severely ...,"Chandina, Bangladesh","চান্দিনা, কুমিল্লা জেলা, চট্টগ্রাম বিভাগ, 3510...",23.488376,91.007812
1,https://www.observerbd.com/news.php?id=319194,2021,27-Jun,Four killed in Jashore accident on way to buy ...,Four killed in Jashore accident on way to buy ...,"Jashore, Bangladesh","যশোর, যশোর জেলা, খুলনা বিভাগ, 7400, বাংলাদেশ",23.166597,89.209514
2,https://www.observerbd.com/news.php?id=318258,2021,21-Jun,"Man, nephew killed in Chattogram bike accident","Man, nephew killed in Chattogram bike accident...","Mirsarai, Bangladesh","Mirsarai Upazila Code Road, চট্টগ্রাম জেলা, চট...",22.774456,91.563192
3,https://www.observerbd.com/news.php?id=317968,2021,19-Jun,NGO worker killed in Bhola road accident,NGO worker killed in Bhola road accidentA work...,"Bhola, Bangladesh","ভোলা জেলা, বরিশাল বিভাগ, বাংলাদেশ",22.336542,90.843902
4,https://www.observerbd.com/news.php?id=317963,2021,19-Jun,Three killed in Narsingdi road accident,Three killed in Narsingdi road accidentThree p...,"Narsingdi, Bangladesh","নরসিংদী, নরসিংদী জেলা, ঢাকা বিভাগ, 1602, বাংলাদেশ",23.915645,90.698196
...,...,...,...,...,...,...,...,...,...
464,https://www.observerbd.com/news.php?id=176928,2019,07-Jan,"Mother, son killed in Netrakona road accident","NETRAKONA, Jan 6: A mother and her son were ki...","Dhaka, Bangladesh","ঢাকা, Chanpara Bazar, ঢাকা জেলা, ঢাকা বিভাগ, 1...",23.810651,90.412647
465,https://www.observerbd.com/news.php?id=176806,2019,06-Jan,Mother-son killed in road accident\n,A woman and her son were killed while her daug...,"Netrokona, Bangladesh","নেত্রকোনা, নেত্রকোনা জেলা, ময়মনসিংহ বিভাগ, বা...",24.885340,90.732625
466,https://www.observerbd.com/news.php?id=176524,2019,04-Jan,Teacher killed in road accident,Teacher killed in road accident A retired teac...,"Kasba, Bangladesh","কসবা, ব্রাহ্মণবাড়িয়া জেলা, চট্টগ্রাম বিভাগ, ...",23.746715,91.145893
467,https://www.observerbd.com/news.php?id=176389,2019,03-Jan,3 of a family die in Faridpur road accident,3 of a family die in Faridpur road accidentThr...,"Boalmari, Bangladesh","বোয়ালমারী, ফরিদপুর জেলা, ঢাকা বিভাগ, বাংলাদেশ",23.385456,89.681615


In [15]:
df1.to_csv('Daily_Observer_hm.csv', index = False)

In [21]:
df_new = df1.copy()

In [22]:
import folium
from folium.plugins import *
from folium import plugins

In [24]:
df_new = df_new.dropna()
#df_new = df_new[(df_new['Latitude'] != 0) & (df_new['Longitude'] != 0)]

def map():


    # Create a map centered on Vancouver
    map_bd = folium.Map(location= [23.6850, 90.3563], tiles="cartodbpositron", zoom_start = 7)

    # Create a list with lat and long values and add the list to a heat map, then show map
    heat_data = [[row['Latitude'],row['Longitude']] for index, row in df_new.iterrows()]
    HeatMap(heat_data).add_to(map_bd)

    # instantiate a feature group for the incidents in the dataframe
    incidents = folium.map.FeatureGroup()

    # loop through the 100 crimes and add each to the incidents feature group
    for lat, lng, in zip(df_new.Latitude, df_new.Longitude):
        incidents.add_child(
            folium.CircleMarker(
                [lat, lng],
                radius=5, # define how big you want the circle markers to be
                color='darkred',
                fill=True,
                fill_color='red',
                fill_opacity=0.6
            )
        )
    
    #map_van.add_child(incidents)
    folium.TileLayer('cartodbdark_matter').add_to(map_bd)

    # instantiate a mark cluster object for the incidents in the dataframe
    incident = plugins.MarkerCluster().add_to(map_bd)

    # loop through the dataframe and add each data point to the mark cluster
    for lat, lng, label, in zip(df_new.Latitude, df_new.Longitude, df_new.Location_appox):
        folium.Marker(
            location=[lat, lng],
            icon=None,
            popup=label,
        ).add_to(incident)

    # add incidents to map
    map_bd.add_child(incident)
    return map_bd

map()

In [25]:
from time import strptime

df_new['month'] = df_new.date.str[3:6]
month_num = []
for i in range(len(df1)):
    month_num.append(strptime(df_new['month'][i],'%b').tm_mon)
df_new['month_num'] = month_num
df_new["Weight"] = df_new['month_num'].astype(float)
df_new

Unnamed: 0,links,Year,date,titles,News,Location_appox,location,Latitude,Longitude,month,month_num,Weight
0,https://www.observerbd.com/news.php?id=319338,2021,28-Jun,"2 killed, 5 injured in Cumilla road accident\n",Two were killed and five others were severely ...,"Chandina, Bangladesh","চান্দিনা, কুমিল্লা জেলা, চট্টগ্রাম বিভাগ, 3510...",23.488376,91.007812,Jun,6,6.0
1,https://www.observerbd.com/news.php?id=319194,2021,27-Jun,Four killed in Jashore accident on way to buy ...,Four killed in Jashore accident on way to buy ...,"Jashore, Bangladesh","যশোর, যশোর জেলা, খুলনা বিভাগ, 7400, বাংলাদেশ",23.166597,89.209514,Jun,6,6.0
2,https://www.observerbd.com/news.php?id=318258,2021,21-Jun,"Man, nephew killed in Chattogram bike accident","Man, nephew killed in Chattogram bike accident...","Mirsarai, Bangladesh","Mirsarai Upazila Code Road, চট্টগ্রাম জেলা, চট...",22.774456,91.563192,Jun,6,6.0
3,https://www.observerbd.com/news.php?id=317968,2021,19-Jun,NGO worker killed in Bhola road accident,NGO worker killed in Bhola road accidentA work...,"Bhola, Bangladesh","ভোলা জেলা, বরিশাল বিভাগ, বাংলাদেশ",22.336542,90.843902,Jun,6,6.0
4,https://www.observerbd.com/news.php?id=317963,2021,19-Jun,Three killed in Narsingdi road accident,Three killed in Narsingdi road accidentThree p...,"Narsingdi, Bangladesh","নরসিংদী, নরসিংদী জেলা, ঢাকা বিভাগ, 1602, বাংলাদেশ",23.915645,90.698196,Jun,6,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...
464,https://www.observerbd.com/news.php?id=176928,2019,07-Jan,"Mother, son killed in Netrakona road accident","NETRAKONA, Jan 6: A mother and her son were ki...","Dhaka, Bangladesh","ঢাকা, Chanpara Bazar, ঢাকা জেলা, ঢাকা বিভাগ, 1...",23.810651,90.412647,Jan,1,1.0
465,https://www.observerbd.com/news.php?id=176806,2019,06-Jan,Mother-son killed in road accident\n,A woman and her son were killed while her daug...,"Netrokona, Bangladesh","নেত্রকোনা, নেত্রকোনা জেলা, ময়মনসিংহ বিভাগ, বা...",24.885340,90.732625,Jan,1,1.0
466,https://www.observerbd.com/news.php?id=176524,2019,04-Jan,Teacher killed in road accident,Teacher killed in road accident A retired teac...,"Kasba, Bangladesh","কসবা, ব্রাহ্মণবাড়িয়া জেলা, চট্টগ্রাম বিভাগ, ...",23.746715,91.145893,Jan,1,1.0
467,https://www.observerbd.com/news.php?id=176389,2019,03-Jan,3 of a family die in Faridpur road accident,3 of a family die in Faridpur road accidentThr...,"Boalmari, Bangladesh","বোয়ালমারী, ফরিদপুর জেলা, ঢাকা বিভাগ, বাংলাদেশ",23.385456,89.681615,Jan,1,1.0


In [26]:
import datetime 
lista_tempo = [] 

for x in df_new['month_num']: 
    monthinteger = x 
    lista_tempo.append(datetime.date(1900, monthinteger, 1).strftime('%B')) 
    
df_new['months_in_full'] = lista_tempo 
df_new['month_year'] = df_new['month'] + ' ' + df_new['Year'].astype(str)

df_new

Unnamed: 0,links,Year,date,titles,News,Location_appox,location,Latitude,Longitude,month,month_num,Weight,months_in_full,month_year
0,https://www.observerbd.com/news.php?id=319338,2021,28-Jun,"2 killed, 5 injured in Cumilla road accident\n",Two were killed and five others were severely ...,"Chandina, Bangladesh","চান্দিনা, কুমিল্লা জেলা, চট্টগ্রাম বিভাগ, 3510...",23.488376,91.007812,Jun,6,6.0,June,Jun 2021
1,https://www.observerbd.com/news.php?id=319194,2021,27-Jun,Four killed in Jashore accident on way to buy ...,Four killed in Jashore accident on way to buy ...,"Jashore, Bangladesh","যশোর, যশোর জেলা, খুলনা বিভাগ, 7400, বাংলাদেশ",23.166597,89.209514,Jun,6,6.0,June,Jun 2021
2,https://www.observerbd.com/news.php?id=318258,2021,21-Jun,"Man, nephew killed in Chattogram bike accident","Man, nephew killed in Chattogram bike accident...","Mirsarai, Bangladesh","Mirsarai Upazila Code Road, চট্টগ্রাম জেলা, চট...",22.774456,91.563192,Jun,6,6.0,June,Jun 2021
3,https://www.observerbd.com/news.php?id=317968,2021,19-Jun,NGO worker killed in Bhola road accident,NGO worker killed in Bhola road accidentA work...,"Bhola, Bangladesh","ভোলা জেলা, বরিশাল বিভাগ, বাংলাদেশ",22.336542,90.843902,Jun,6,6.0,June,Jun 2021
4,https://www.observerbd.com/news.php?id=317963,2021,19-Jun,Three killed in Narsingdi road accident,Three killed in Narsingdi road accidentThree p...,"Narsingdi, Bangladesh","নরসিংদী, নরসিংদী জেলা, ঢাকা বিভাগ, 1602, বাংলাদেশ",23.915645,90.698196,Jun,6,6.0,June,Jun 2021
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
464,https://www.observerbd.com/news.php?id=176928,2019,07-Jan,"Mother, son killed in Netrakona road accident","NETRAKONA, Jan 6: A mother and her son were ki...","Dhaka, Bangladesh","ঢাকা, Chanpara Bazar, ঢাকা জেলা, ঢাকা বিভাগ, 1...",23.810651,90.412647,Jan,1,1.0,January,Jan 2019
465,https://www.observerbd.com/news.php?id=176806,2019,06-Jan,Mother-son killed in road accident\n,A woman and her son were killed while her daug...,"Netrokona, Bangladesh","নেত্রকোনা, নেত্রকোনা জেলা, ময়মনসিংহ বিভাগ, বা...",24.885340,90.732625,Jan,1,1.0,January,Jan 2019
466,https://www.observerbd.com/news.php?id=176524,2019,04-Jan,Teacher killed in road accident,Teacher killed in road accident A retired teac...,"Kasba, Bangladesh","কসবা, ব্রাহ্মণবাড়িয়া জেলা, চট্টগ্রাম বিভাগ, ...",23.746715,91.145893,Jan,1,1.0,January,Jan 2019
467,https://www.observerbd.com/news.php?id=176389,2019,03-Jan,3 of a family die in Faridpur road accident,3 of a family die in Faridpur road accidentThr...,"Boalmari, Bangladesh","বোয়ালমারী, ফরিদপুর জেলা, ঢাকা বিভাগ, বাংলাদেশ",23.385456,89.681615,Jan,1,1.0,January,Jan 2019


In [27]:
df_new['indexx'] = df_new['month'] + ' ' + df_new['Year'].astype(str)
lista_index = df_new['indexx'].unique().tolist()

weight_list = []

df_new['conta'] = 1 
for x in df_new['month_year'].sort_values().unique(): 
    weight_list.append(df_new.loc[df_new['month_year'] == x, 
                                        ['Latitude',"Longitude",'conta']].groupby(['Latitude','Longitude']).sum().reset_index().values.tolist()) 
    
base_map = folium.Map(location=[23.6850, 90.3563],tiles="stamen toner",zoom_start = 6) 

#create the heatmap from our List 
HeatMapWithTime(weight_list, radius=20,index= lista_index, gradient={0.1: 'blue',0.5: 'green', 0.5: 'yellow', 0.95: 'orange', 1: 'red'}, \
                                                                     
                        auto_play =True, min_opacity=0.5, max_opacity=1, use_local_extrema=True).add_to(base_map) 
                                                                
                                                                     
base_map

### References

* https://medium.com/nerd-for-tech/time-lapse-heat-maps-with-folium-1847f53ec956