In [1]:
import folium
import pandas as pd
import os
from folium import plugins

### Data loading

In [2]:
all_tweets_location_summary=pd.read_csv('location_summary.csv',sep=';')
tweets=pd.read_csv('tweet_stream.csv',sep=';',encoding='latin1')
hashtags=pd.read_csv('hashtags.csv',sep=';',encoding='latin1')

In [3]:
CONTENT_COL='Tweet content'
LATITUDE_COL='Latitude'
LONGITUDE_COL='Longitude'
tweets_df = pd.DataFrame(tweets, columns = [CONTENT_COL, LATITUDE_COL, LONGITUDE_COL])
tweets_df

Unnamed: 0,Tweet content,Latitude,Longitude
0,Talking to some great journos this morning all...,51.505185,-0.150128
1,Incident (Severe delay) M6 northbound between ...,52.615627,-2.029030
2,THIS: Ò@Colmogorman: Great piece by @amcteirna...,53.336995,-6.279686
3,A Little snippet from my #SundayJazzCongregati...,51.513451,-0.125331
4,Latest Book Review: The Closet of Savage Memen...,49.201770,-2.114542
...,...,...,...
169028,#beautiful #green #somerset #levels #love #liv...,51.118702,-2.915014
169029,@stephenkb Well indeed...,51.424421,0.232808
169030,https://t.co/Rwju1X0nAC - How to Communicate E...,54.063300,-2.884240
169031,Dah sampaiiii (@ Manchester International Airp...,53.362359,-2.274299


In [4]:
HASH_COL='Hash'
REPEATS_COL='Repeats'
hashtags_df=pd.DataFrame(hashtags, columns = [HASH_COL, REPEATS_COL])
hashtags_df.head(15)

Unnamed: 0,Hash,Repeats
0,nowplaying,5300
1,Hiring,2920
2,areacode,2407
3,tnc,2407
4,job,2157
5,Job,2154
6,london,2068
7,CareerArc,2039
8,Jobs,1933
9,London,1831


In [5]:
all_tweets_location_summary

Unnamed: 0,Place (as appears on Bio),Keyword Repetitions,Latitude,Longitude
0,"London, England",11355,51.505185,-0.150128
1,"Dublin City, Ireland",3597,53.336995,-6.279686
2,"Manchester, England",3365,53.480759,-2.242631
3,"South East, England",3137,51.617222,-0.601111
4,"Camberwell, London",2921,51.507447,-0.094328
...,...,...,...,...
95,"Milton Keynes, England",331,52.041633,-0.707300
96,"Staines-upon-Thames, South East",327,51.419193,-0.419150
97,"Wicklow, Ireland",321,53.201429,-6.100073
98,"Deal, England",318,51.217778,1.403889


### Distribution of most popular #hashtags

In [6]:
TOP_HASHTAGS_COUNT=15
RADIUS=15
LATITUDE_COL='Latitude'
LONGITUDE_COL='Longitude'
HASH_COL='Hash'
COORDINATES = (51.505185, -0.150128)
ZOOM=6

def filter_tweets_by_hashtags(collection,tag):
    return collection[collection['Tweet content'].str.find(tag)>0]

def prepare_feature_group(collection,display_name):
    fg = folium.FeatureGroup(name='#'+display_name, show=False, overlay=True)
    plugins.HeatMap(zip(list(collection[LATITUDE_COL]), list(collection[LONGITUDE_COL])), radius = RADIUS).add_to(fg)
    return fg

# attr='USGS style'

def prepare_map():
    hashtag_map = folium.Map(location=COORDINATES, zoom_start=ZOOM)
    tags_list=list(hashtags_df.head(TOP_HASHTAGS_COUNT)[HASH_COL])
    for tag in tags_list:
        filtered_tweets=filter_tweets_by_hashtags(tweets_df,tag)
        feature_group=prepare_feature_group(filtered_tweets,tag)
        feature_group.add_to(hashtag_map)
    folium.LayerControl().add_to(hashtag_map)
    return hashtag_map
    
display(prepare_map())


### Distribution of all hashtags 

In [8]:
all_tweets_location_summary

Unnamed: 0,Place (as appears on Bio),Keyword Repetitions,Latitude,Longitude
0,"London, England",11355,51.505185,-0.150128
1,"Dublin City, Ireland",3597,53.336995,-6.279686
2,"Manchester, England",3365,53.480759,-2.242631
3,"South East, England",3137,51.617222,-0.601111
4,"Camberwell, London",2921,51.507447,-0.094328
...,...,...,...,...
95,"Milton Keynes, England",331,52.041633,-0.707300
96,"Staines-upon-Thames, South East",327,51.419193,-0.419150
97,"Wicklow, Ireland",321,53.201429,-6.100073
98,"Deal, England",318,51.217778,1.403889


In [10]:
INITIAL_COORDINATES = (51.505185, -0.150128)

lats=[]
longs=[]
weights=[]
for row in all_tweets_location_summary.iterrows():
    lats.append(row[1]['Latitude'])
    longs.append(row[1]['Longitude'])
    weights.append(row[1]['Keyword Repetitions'])

all_tweets_map = folium.Map(location=INITIAL_COORDINATES, zoom_start=6,attr='USGS style')
plugins.HeatMap(zip(lats, longs, weights), radius = 20).add_to(all_tweets_map)
display(all_tweets_map)