In [721]:
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
from shapely.geometry import Point, Polygon
import folium
from shapely.geometry import LineString
from sympy import centroid
from geopandas import GeoDataFrame
from math import nan

### Adding Tweet Locations
Outputs for maps and plots have been commented out due to github size constraints

In [722]:
tweets = pd.read_csv('SA_Tweets_All_Locations_v2.csv')

In [723]:
tweets.head(3)

Unnamed: 0.1,Unnamed: 0,author_id,username,author_followers,author_tweets,author_description,author_location,text,created_at,retweets,replies,likes,quote_count,place_id,place_name,bbox
0,0,1353804645580353537,ranthotse17,1433,2648,all is well,Limpopo,Go shota R1000 ya registration \n\nBlesser: +R...,2022-09-29 19:40:06+00:00,0,0,1,0,{'place_id': 'dd9c0d7d7e07eb49'},South Africa,"[16.4475932, -34.8342468, 32.8922934, -22.1247..."
1,1,1258457287120818182,Michael86259668,1323,29766,plus500 associate commodity fx and cfd trader ...,South Africa,Spx yesterday's rally\nPoof gone https://t.co/...,2022-09-29 14:17:35+00:00,0,0,1,0,{'place_id': 'a02e6c261fa62b42'},"Benoni, South Africa","[28.2722463, -26.2315204, 28.4449594, -26.0681..."
2,2,2496356957,KabeloMG,1436,35414,This is Twitter.We tweet like birds hence the ...,,@JohnPerlman A driverless car wouldn't work in...,2022-09-29 13:49:14+00:00,1,0,0,0,{'place_id': '0e587c59401d0a27'},"Pretoria, South Africa","[27.9483035, -25.9157727, 28.4198285, -25.5894..."


In [724]:
tweets.shape

(21655, 16)

In [725]:
def bbox_to_coords(bbox):
    all_coords = bbox.split()
    coords = []
    for coordinate in all_coords:
        coordinate = coordinate.replace('[','')
        coordinate = coordinate.replace(']','')
        coordinate = coordinate.replace(',','')
        coord = float(coordinate)
        coords.append(coord)

    return coords

In [726]:
tweets['coords'] = tweets['bbox'].apply(lambda x: bbox_to_coords(x))

In [727]:
tweets['longitude_1'] = tweets['coords'].apply(lambda x: x[0])
tweets['latitude_1'] = tweets['coords'].apply(lambda x: x[1])
tweets['longitude_2'] = tweets['coords'].apply(lambda x: x[2])
tweets['latitude_2'] = tweets['coords'].apply(lambda x: x[3])

In [728]:
tweets.head(3)

Unnamed: 0.1,Unnamed: 0,author_id,username,author_followers,author_tweets,author_description,author_location,text,created_at,retweets,...,likes,quote_count,place_id,place_name,bbox,coords,longitude_1,latitude_1,longitude_2,latitude_2
0,0,1353804645580353537,ranthotse17,1433,2648,all is well,Limpopo,Go shota R1000 ya registration \n\nBlesser: +R...,2022-09-29 19:40:06+00:00,0,...,1,0,{'place_id': 'dd9c0d7d7e07eb49'},South Africa,"[16.4475932, -34.8342468, 32.8922934, -22.1247...","[16.4475932, -34.8342468, 32.8922934, -22.1247...",16.447593,-34.834247,32.892293,-22.124724
1,1,1258457287120818182,Michael86259668,1323,29766,plus500 associate commodity fx and cfd trader ...,South Africa,Spx yesterday's rally\nPoof gone https://t.co/...,2022-09-29 14:17:35+00:00,0,...,1,0,{'place_id': 'a02e6c261fa62b42'},"Benoni, South Africa","[28.2722463, -26.2315204, 28.4449594, -26.0681...","[28.2722463, -26.2315204, 28.4449594, -26.0681...",28.272246,-26.23152,28.444959,-26.068193
2,2,2496356957,KabeloMG,1436,35414,This is Twitter.We tweet like birds hence the ...,,@JohnPerlman A driverless car wouldn't work in...,2022-09-29 13:49:14+00:00,1,...,0,0,{'place_id': '0e587c59401d0a27'},"Pretoria, South Africa","[27.9483035, -25.9157727, 28.4198285, -25.5894...","[27.9483035, -25.9157727, 28.4198285, -25.5894...",27.948304,-25.915773,28.419829,-25.589438


In [729]:
tweets = tweets.drop('bbox', axis=1)
tweets.head(3)

Unnamed: 0.1,Unnamed: 0,author_id,username,author_followers,author_tweets,author_description,author_location,text,created_at,retweets,replies,likes,quote_count,place_id,place_name,coords,longitude_1,latitude_1,longitude_2,latitude_2
0,0,1353804645580353537,ranthotse17,1433,2648,all is well,Limpopo,Go shota R1000 ya registration \n\nBlesser: +R...,2022-09-29 19:40:06+00:00,0,0,1,0,{'place_id': 'dd9c0d7d7e07eb49'},South Africa,"[16.4475932, -34.8342468, 32.8922934, -22.1247...",16.447593,-34.834247,32.892293,-22.124724
1,1,1258457287120818182,Michael86259668,1323,29766,plus500 associate commodity fx and cfd trader ...,South Africa,Spx yesterday's rally\nPoof gone https://t.co/...,2022-09-29 14:17:35+00:00,0,0,1,0,{'place_id': 'a02e6c261fa62b42'},"Benoni, South Africa","[28.2722463, -26.2315204, 28.4449594, -26.0681...",28.272246,-26.23152,28.444959,-26.068193
2,2,2496356957,KabeloMG,1436,35414,This is Twitter.We tweet like birds hence the ...,,@JohnPerlman A driverless car wouldn't work in...,2022-09-29 13:49:14+00:00,1,0,0,0,{'place_id': '0e587c59401d0a27'},"Pretoria, South Africa","[27.9483035, -25.9157727, 28.4198285, -25.5894...",27.948304,-25.915773,28.419829,-25.589438


### Centroid

In [730]:
def find_centroid(coords):
    geometry = LineString([(coords[0], coords[1]),(coords[2], coords[3])])
    centroid = geometry.centroid
    return centroid

In [731]:
tweets['centroid'] = tweets['coords'].apply(lambda x: find_centroid(x))

In [732]:
# tweets = tweets.drop('geometry', axis=1)
# tweets.head()

In [733]:
tweets['centroid_long'] = tweets['centroid'].apply(lambda z: z.x)
tweets['centroid_lat'] = tweets['centroid'].apply(lambda z: z.y)

In [734]:
tweets = tweets.drop('centroid', axis =1)
tweets.head()

Unnamed: 0.1,Unnamed: 0,author_id,username,author_followers,author_tweets,author_description,author_location,text,created_at,retweets,...,quote_count,place_id,place_name,coords,longitude_1,latitude_1,longitude_2,latitude_2,centroid_long,centroid_lat
0,0,1353804645580353537,ranthotse17,1433,2648,all is well,Limpopo,Go shota R1000 ya registration \n\nBlesser: +R...,2022-09-29 19:40:06+00:00,0,...,0,{'place_id': 'dd9c0d7d7e07eb49'},South Africa,"[16.4475932, -34.8342468, 32.8922934, -22.1247...",16.447593,-34.834247,32.892293,-22.124724,24.669943,-28.479485
1,1,1258457287120818182,Michael86259668,1323,29766,plus500 associate commodity fx and cfd trader ...,South Africa,Spx yesterday's rally\nPoof gone https://t.co/...,2022-09-29 14:17:35+00:00,0,...,0,{'place_id': 'a02e6c261fa62b42'},"Benoni, South Africa","[28.2722463, -26.2315204, 28.4449594, -26.0681...",28.272246,-26.23152,28.444959,-26.068193,28.358603,-26.149857
2,2,2496356957,KabeloMG,1436,35414,This is Twitter.We tweet like birds hence the ...,,@JohnPerlman A driverless car wouldn't work in...,2022-09-29 13:49:14+00:00,1,...,0,{'place_id': '0e587c59401d0a27'},"Pretoria, South Africa","[27.9483035, -25.9157727, 28.4198285, -25.5894...",27.948304,-25.915773,28.419829,-25.589438,28.184066,-25.752605
3,3,1309123451433746437,TheLegitIcon2,6458,37979,"Phumlani, 𝒍𝒐𝒗𝒆𝒓, 𝒇𝒖𝒕𝒖𝒓𝒆 𝒍𝒆𝒂𝒅𝒆𝒓 𝒂𝒏𝒅 𝒂 𝒈𝒐𝒐𝒅 𝒄𝒉𝒂𝒓...","Pietermaritzburg, South Africa",At least offer ukumbhalela ama board for that ...,2022-09-29 11:10:06+00:00,0,...,0,{'place_id': '52e073e7724385c3'},"Pietermaritzburg, South Africa","[30.2563496, -29.6999989, 30.4512134, -29.5328...",30.25635,-29.699999,30.451213,-29.532801,30.353782,-29.6164
4,4,177685467,Nwabisa_1,1878,52381,"mamCirha, Qhanqolo, Ncibane , Nojawolo, Ntswen...",Joburg,We used to catch a train from Ikwezi to Mzimhl...,2022-09-29 10:52:13+00:00,1,...,0,{'place_id': '3e46a98adcf05e59'},"Meadowlands, South Africa","[27.8651453, -26.2359005, 27.9219768, -26.1964...",27.865145,-26.2359,27.921977,-26.196478,27.893561,-26.216189


### Find Author Locations for place_name = South Africa

In [735]:
# from geopy.geocoders import Nominatim

# # Change to function so lambda expression can be used:
# def get_author_location(author_location):
#     loc = Nominatim(user_agent="GetLoc")
#     getLoc = loc.geocode(author_location, timeout=2)
#     if getLoc is not None:
#         return getLoc
#     else:
#         return nan

In [736]:
# count = 0
# authour_locs = []

# for index, tweet in tweets.iterrows():
#     loc = nan
#     if(tweet['place_name'] == 'South Africa'):
#         if(tweet['author_location'] != nan):
#             loc = get_author_location(tweet['author_location'])
#             if(loc == 'South Africa'):
#                 loc = nan
#         count = count + 1
#         print(count)
#     authour_locs.append(loc)


# tweets['actual_author_location'] = authour_locs

### Fix Edge Cases
Such as Cape Town

In [737]:
tweets.loc[tweets.place_name == 'Cape Town, South Africa', ['centroid_long', 'centroid_lat']] = 18.4241, -33.9249

### Try plot on map

In [738]:
geometry = [Point(xy) for xy in zip(tweets['centroid_long'], tweets['centroid_lat'])]
gdf = GeoDataFrame(tweets, geometry=geometry)   

#this is a simple map that goes with geopandas
#world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
#gdf.plot(ax=world.plot(figsize=(10, 6)), marker='o', color='red', markersize=15);

In [739]:
gdf.head(2)

Unnamed: 0.1,Unnamed: 0,author_id,username,author_followers,author_tweets,author_description,author_location,text,created_at,retweets,...,place_id,place_name,coords,longitude_1,latitude_1,longitude_2,latitude_2,centroid_long,centroid_lat,geometry
0,0,1353804645580353537,ranthotse17,1433,2648,all is well,Limpopo,Go shota R1000 ya registration \n\nBlesser: +R...,2022-09-29 19:40:06+00:00,0,...,{'place_id': 'dd9c0d7d7e07eb49'},South Africa,"[16.4475932, -34.8342468, 32.8922934, -22.1247...",16.447593,-34.834247,32.892293,-22.124724,24.669943,-28.479485,POINT (24.66994 -28.47949)
1,1,1258457287120818182,Michael86259668,1323,29766,plus500 associate commodity fx and cfd trader ...,South Africa,Spx yesterday's rally\nPoof gone https://t.co/...,2022-09-29 14:17:35+00:00,0,...,{'place_id': 'a02e6c261fa62b42'},"Benoni, South Africa","[28.2722463, -26.2315204, 28.4449594, -26.0681...",28.272246,-26.23152,28.444959,-26.068193,28.358603,-26.149857,POINT (28.35860 -26.14986)


In [740]:
map = folium.Map(location = [-29, 27], tiles='OpenStreetMap' , zoom_start = 5.3)

In [741]:
#gdf.explore(m = map)

In [742]:
tweets = tweets.drop('longitude_1', axis=1)
tweets = tweets.drop('longitude_2', axis=1)
tweets = tweets.drop('latitude_1', axis=1)
tweets = tweets.drop('latitude_2', axis=1)
tweets = tweets.drop('centroid_long', axis=1)
tweets = tweets.drop('centroid_lat', axis=1)
tweets.head(2)

Unnamed: 0.1,Unnamed: 0,author_id,username,author_followers,author_tweets,author_description,author_location,text,created_at,retweets,replies,likes,quote_count,place_id,place_name,coords,geometry
0,0,1353804645580353537,ranthotse17,1433,2648,all is well,Limpopo,Go shota R1000 ya registration \n\nBlesser: +R...,2022-09-29 19:40:06+00:00,0,0,1,0,{'place_id': 'dd9c0d7d7e07eb49'},South Africa,"[16.4475932, -34.8342468, 32.8922934, -22.1247...",POINT (24.66994 -28.47949)
1,1,1258457287120818182,Michael86259668,1323,29766,plus500 associate commodity fx and cfd trader ...,South Africa,Spx yesterday's rally\nPoof gone https://t.co/...,2022-09-29 14:17:35+00:00,0,0,1,0,{'place_id': 'a02e6c261fa62b42'},"Benoni, South Africa","[28.2722463, -26.2315204, 28.4449594, -26.0681...",POINT (28.35860 -26.14986)


### Tweet Aggregates For Every Location

In [743]:
tweet_agg = tweets.groupby(['place_id']).size()
tweet_agg =pd.DataFrame({'place_id':tweet_agg.index, 'tweet_count':tweet_agg.values})
tweet_agg.head()

Unnamed: 0,place_id,tweet_count
0,"{'coordinates': {'type': 'Point', 'coordinates...",1
1,"{'coordinates': {'type': 'Point', 'coordinates...",1
2,"{'coordinates': {'type': 'Point', 'coordinates...",1
3,"{'coordinates': {'type': 'Point', 'coordinates...",1
4,"{'coordinates': {'type': 'Point', 'coordinates...",8


In [744]:
def get_place_name(place_id):
    place_name_df = tweets.loc[tweets['place_id'] == place_id, 'place_name']
    place_name = place_name_df.unique()
    place_name = place_name[0]
    return place_name

In [745]:
from shapely.geometry import Point, mapping, shape
def get_place_geometry(place_id):
    place_geometry_df = tweets.loc[tweets['place_id'] == place_id, 'geometry']
    place_geometry = place_geometry_df.unique()
    place_geometry = place_geometry[0]
    return place_geometry

In [746]:
tweet_agg['place_name'] = tweet_agg['place_id'].apply(lambda x: get_place_name(x))
tweet_agg['place_coords'] = tweet_agg['place_id'].apply(lambda x: get_place_geometry(x))
tweet_agg.head()

Unnamed: 0,place_id,tweet_count,place_name,place_coords
0,"{'coordinates': {'type': 'Point', 'coordinates...",1,South Africa,POINT (24.6699433 -28.4794852)
1,"{'coordinates': {'type': 'Point', 'coordinates...",1,"Noordhoek, South Africa",POINT (18.38045585 -34.10140905)
2,"{'coordinates': {'type': 'Point', 'coordinates...",1,"Cape Town, South Africa",POINT (18.4241 -33.9249)
3,"{'coordinates': {'type': 'Point', 'coordinates...",1,"Cape Town, South Africa",POINT (18.4241 -33.9249)
4,"{'coordinates': {'type': 'Point', 'coordinates...",8,"Cape Town, South Africa",POINT (18.4241 -33.9249)


In [747]:
tweet_agg = tweet_agg.set_index('place_id')
tweet_agg.head()

Unnamed: 0_level_0,tweet_count,place_name,place_coords
place_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"{'coordinates': {'type': 'Point', 'coordinates': [18.34921, -33.05852]}, 'place_id': 'dd9c0d7d7e07eb49'}",1,South Africa,POINT (24.6699433 -28.4794852)
"{'coordinates': {'type': 'Point', 'coordinates': [18.37772958, -34.09727455]}, 'place_id': 'b6f995358c6942da'}",1,"Noordhoek, South Africa",POINT (18.38045585 -34.10140905)
"{'coordinates': {'type': 'Point', 'coordinates': [18.38643551, -33.91554429]}, 'place_id': '8b9ec16fdc0d7e55'}",1,"Cape Town, South Africa",POINT (18.4241 -33.9249)
"{'coordinates': {'type': 'Point', 'coordinates': [18.38943958, -33.91234484]}, 'place_id': '8b9ec16fdc0d7e55'}",1,"Cape Town, South Africa",POINT (18.4241 -33.9249)
"{'coordinates': {'type': 'Point', 'coordinates': [18.41104746, -33.92938496]}, 'place_id': '8b9ec16fdc0d7e55'}",8,"Cape Town, South Africa",POINT (18.4241 -33.9249)


In [748]:
tweet_sums = tweets.groupby(['place_id'])[['retweets','quote_count', 'replies', 'likes', 'author_followers']].sum()
tweet_sums.head()

Unnamed: 0_level_0,retweets,quote_count,replies,likes,author_followers
place_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"{'coordinates': {'type': 'Point', 'coordinates': [18.34921, -33.05852]}, 'place_id': 'dd9c0d7d7e07eb49'}",0,0,0,0,14
"{'coordinates': {'type': 'Point', 'coordinates': [18.37772958, -34.09727455]}, 'place_id': 'b6f995358c6942da'}",0,0,0,0,11895
"{'coordinates': {'type': 'Point', 'coordinates': [18.38643551, -33.91554429]}, 'place_id': '8b9ec16fdc0d7e55'}",0,0,0,0,2072
"{'coordinates': {'type': 'Point', 'coordinates': [18.38943958, -33.91234484]}, 'place_id': '8b9ec16fdc0d7e55'}",0,0,0,0,1386
"{'coordinates': {'type': 'Point', 'coordinates': [18.41104746, -33.92938496]}, 'place_id': '8b9ec16fdc0d7e55'}",0,0,0,2,2720


In [749]:
tweet_agg = tweet_agg.join(tweet_sums)

In [750]:
tweet_agg['place_id'] = tweet_agg.index
tweet_agg = tweet_agg.reset_index(drop = True)
tweet_agg.head()

Unnamed: 0,tweet_count,place_name,place_coords,retweets,quote_count,replies,likes,author_followers,place_id
0,1,South Africa,POINT (24.6699433 -28.4794852),0,0,0,0,14,"{'coordinates': {'type': 'Point', 'coordinates..."
1,1,"Noordhoek, South Africa",POINT (18.38045585 -34.10140905),0,0,0,0,11895,"{'coordinates': {'type': 'Point', 'coordinates..."
2,1,"Cape Town, South Africa",POINT (18.4241 -33.9249),0,0,0,0,2072,"{'coordinates': {'type': 'Point', 'coordinates..."
3,1,"Cape Town, South Africa",POINT (18.4241 -33.9249),0,0,0,0,1386,"{'coordinates': {'type': 'Point', 'coordinates..."
4,8,"Cape Town, South Africa",POINT (18.4241 -33.9249),0,0,0,2,2720,"{'coordinates': {'type': 'Point', 'coordinates..."


In [751]:
tweet_agg_temp = tweet_agg
tweet_agg = tweet_agg.drop('place_coords', axis = 1)

gdf2 = GeoDataFrame(tweet_agg, geometry=tweet_agg_temp['place_coords']) 
gdf2.explore(m = map)



In [752]:
tweet_agg.head()

Unnamed: 0,tweet_count,place_name,retweets,quote_count,replies,likes,author_followers,place_id,geometry
0,1,South Africa,0,0,0,0,14,"{'coordinates': {'type': 'Point', 'coordinates...",POINT (24.66994 -28.47949)
1,1,"Noordhoek, South Africa",0,0,0,0,11895,"{'coordinates': {'type': 'Point', 'coordinates...",POINT (18.38046 -34.10141)
2,1,"Cape Town, South Africa",0,0,0,0,2072,"{'coordinates': {'type': 'Point', 'coordinates...",POINT (18.42410 -33.92490)
3,1,"Cape Town, South Africa",0,0,0,0,1386,"{'coordinates': {'type': 'Point', 'coordinates...",POINT (18.42410 -33.92490)
4,8,"Cape Town, South Africa",0,0,0,2,2720,"{'coordinates': {'type': 'Point', 'coordinates...",POINT (18.42410 -33.92490)


In [753]:
tweet_agg.to_csv('Tweet_Locations.csv')

### Things still to do

Last thing to do is to put the tweets into each munuciplality/ province

In [754]:
tweets.head()

Unnamed: 0.1,Unnamed: 0,author_id,username,author_followers,author_tweets,author_description,author_location,text,created_at,retweets,replies,likes,quote_count,place_id,place_name,coords,geometry
0,0,1353804645580353537,ranthotse17,1433,2648,all is well,Limpopo,Go shota R1000 ya registration \n\nBlesser: +R...,2022-09-29 19:40:06+00:00,0,0,1,0,{'place_id': 'dd9c0d7d7e07eb49'},South Africa,"[16.4475932, -34.8342468, 32.8922934, -22.1247...",POINT (24.66994 -28.47949)
1,1,1258457287120818182,Michael86259668,1323,29766,plus500 associate commodity fx and cfd trader ...,South Africa,Spx yesterday's rally\nPoof gone https://t.co/...,2022-09-29 14:17:35+00:00,0,0,1,0,{'place_id': 'a02e6c261fa62b42'},"Benoni, South Africa","[28.2722463, -26.2315204, 28.4449594, -26.0681...",POINT (28.35860 -26.14986)
2,2,2496356957,KabeloMG,1436,35414,This is Twitter.We tweet like birds hence the ...,,@JohnPerlman A driverless car wouldn't work in...,2022-09-29 13:49:14+00:00,1,0,0,0,{'place_id': '0e587c59401d0a27'},"Pretoria, South Africa","[27.9483035, -25.9157727, 28.4198285, -25.5894...",POINT (28.18407 -25.75261)
3,3,1309123451433746437,TheLegitIcon2,6458,37979,"Phumlani, 𝒍𝒐𝒗𝒆𝒓, 𝒇𝒖𝒕𝒖𝒓𝒆 𝒍𝒆𝒂𝒅𝒆𝒓 𝒂𝒏𝒅 𝒂 𝒈𝒐𝒐𝒅 𝒄𝒉𝒂𝒓...","Pietermaritzburg, South Africa",At least offer ukumbhalela ama board for that ...,2022-09-29 11:10:06+00:00,0,0,1,0,{'place_id': '52e073e7724385c3'},"Pietermaritzburg, South Africa","[30.2563496, -29.6999989, 30.4512134, -29.5328...",POINT (30.35378 -29.61640)
4,4,177685467,Nwabisa_1,1878,52381,"mamCirha, Qhanqolo, Ncibane , Nojawolo, Ntswen...",Joburg,We used to catch a train from Ikwezi to Mzimhl...,2022-09-29 10:52:13+00:00,1,0,1,0,{'place_id': '3e46a98adcf05e59'},"Meadowlands, South Africa","[27.8651453, -26.2359005, 27.9219768, -26.1964...",POINT (27.89356 -26.21619)


In [755]:
tweets.to_csv('Full_Tweets.csv')