## <b>Spatial analysis</b>
<i>This script can be used to analyse the created topics and calculated sentiments over space. </i>


In [3]:
#Import needed libraries
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import sys
sys.path.append(r'C:\Users\mies1\PycharmProjects\spatialanalysis2\src')
from mylibpysal import libpysal
import pyproj
import esda
plt.style.use('seaborn')

#Read the data with pandas
outputgermany = pd.read_csv('Data\\sentimenttopic70_tweets_germany.csv')
tweets_germany = pd.DataFrame(outputgermany)

#Convert DateTime column to two columns containing the date and the time
tweets_germany[['Date', 'Time']] = tweets_germany['t_datetime_goed'].str.split(expand=True)

#Add topic names again
conditions = [
    (tweets_germany['Topic'] == -1),
    (tweets_germany['Topic'] == 0) | (tweets_germany['Topic'] == 2)| (tweets_germany['Topic'] == 4) | (tweets_germany['Topic'] == 6) | (tweets_germany['Topic'] == 13), 
    (tweets_germany['Topic'] == 1) | (tweets_germany['Topic'] == 5)| (tweets_germany['Topic'] == 10)  | (tweets_germany['Topic'] == 14) | (tweets_germany['Topic'] == 15), 
    (tweets_germany['Topic'] == 3) | (tweets_germany['Topic'] == 7)| (tweets_germany['Topic'] == 8) | (tweets_germany['Topic'] == 9) | (tweets_germany['Topic'] == 11)| (tweets_germany['Topic'] == 12)
    ]

values = ['outlier', 'coronapolicies','prevention','lockdownactivities']
tweets_germany['nametopic'] = np.select(conditions, values)

#Convert labels into -1,0,1 so it can be used in array
conditions = [
    (tweets_germany['label'] == 'Negative'),
    (tweets_germany['label'] == 'Positive'),
    (tweets_germany['label'] == 'Neutral') 
    ]

values = ['-1', '1','0']
tweets_germany['numbersentiment'] = np.select(conditions, values)

#Convert topicname into numbers
conditions = [
    (tweets_germany['nametopic'] == 'outlier'),
    (tweets_germany['nametopic'] == 'coronapolicies'),
    (tweets_germany['nametopic'] == 'lockdownactivities'),
    (tweets_germany['nametopic'] == 'prevention')
    ]

values = ['-1', '1','2','3']
tweets_germany['numbertopicname'] = np.select(conditions, values)

#Create geodataframe
geotweets = gpd.GeoDataFrame(tweets_germany, geometry=gpd.points_from_xy(tweets_germany.x, tweets_germany.y))
geotweets = geotweets.set_crs('EPSG:4326')

#Create dataframe with map of Germany
bundeslander = gpd.read_file(r'Data\germany.shp')
bundeslander = bundeslander.to_crs(epsg=4326)
geom_germany = bundeslander
geom_germany['geometry'] = geom_germany.buffer(0)

Hotspot analysis

In [4]:
#Create dataframes with labels
topics = geotweets['nametopic'].unique().tolist()
labels = geotweets['label'].unique().tolist()
topics.remove('outlier')

#Set fontsize of legend
plt.rc('legend',fontsize=15.5)
plt.style.use('seaborn')

In [1]:
def hotspot (sentiment, topic, title, pngname):
    dbf = geotweets[(geotweets['label'] == sentiment) & (geotweets['nametopic'] == topic)]
    score = dbf['score']

    knn = libpysal.weights.KNN.from_dataframe(dbf,geom_col='geometry',k=20)
    knn.transform = 'r'

    score_lag = libpysal.weights.lag_spatial(knn, score)

    li = esda.moran.Moran_Local(score, knn)
    (li.p_sim < 0.05).sum()
    sig = li.p_sim < 0.05
    hotspot = sig * li.q==1
    coldspot = sig * li.q==2

    spots = ['Not significant', 'Hotspot']
    labels = [spots[i] for i in hotspot*1]

    from matplotlib import colors
    hmap = colors.ListedColormap(['red', 'lightgrey'])
    f, ax = plt.subplots(1, figsize=(15, 15))
    bundeslander.plot(ax=ax, column='geometry',categorical=True, edgecolor='black', color='white')
    dbf.assign(cl=labels).plot(column='cl', categorical=True, \
        k=2, cmap=hmap, linewidth=0.1, ax=ax, \
        edgecolor='white', legend=True, )
    ax.set_axis_off()
    plt.title(title)
    plt.savefig(pngname)

In [None]:
for label in labels:
    for topic in topics:
        df = geotweets[(geotweets['label'] == label) & (geotweets['nametopic'] == topic)]
        if topic == "lockdownactivities":
            topicsname = 'Lockdown activities'
        elif topic == 'coronapolicies':
            topicsname = 'Corona and policies'
        elif topic == 'prevention':
            topicsname = 'Prevention'
        else:
            print ("topic name not found")
        titlefigure = topicsname + ': hotspots of the score of ' '\n'+ label.lower() + ' tweets'
        filename = 'Figures\hotspot_' + topic + '_' + label + '.png'
        hotspot(label, topic, titlefigure, filename)

In [11]:
def coldspot (sentiment, topic, title, pngname):
    dbf = geotweets[(geotweets['label'] == sentiment) & (geotweets['nametopic'] == topic)]
    score = dbf['score']

    knn = libpysal.weights.KNN.from_dataframe(dbf,geom_col='geometry',k=20)
    knn.transform = 'r'

    score_lag = libpysal.weights.lag_spatial(knn, score)

    li = esda.moran.Moran_Local(score, knn)
    (li.p_sim < 0.05).sum()
    sig = li.p_sim < 0.05
    hotspot = sig * li.q==1
    coldspot = sig * li.q==2

    spots = ['Not significant', 'Coldspot']
    labels = [spots[i] for i in hotspot*1]

    from matplotlib import colors
    hmap = colors.ListedColormap(['blue', 'lightgrey'])
    f, ax = plt.subplots(1, figsize=(15, 15))
    bundeslander.plot(ax=ax, column='geometry',categorical=True, edgecolor='black', color='white')
    dbf.assign(cl=labels).plot(column='cl', categorical=True, \
        k=2, cmap=hmap, linewidth=0.1, ax=ax, \
        edgecolor='white', legend=True, )
    ax.set_axis_off()
    plt.title(title)
    plt.savefig(pngname)

In [None]:
for label in labels:
    for topic in topics:
        df = geotweets[(geotweets['label'] == label) & (geotweets['nametopic'] == topic)]
        if topic == "lockdownactivities":
            topicsname = 'Lockdown activities'
        elif topic == 'coronapolicies':
            topicsname = 'Corona and policies'
        elif topic == 'prevention':
            topicsname = 'Prevention'
        elif topic == 'nostalgiaprecovid':
            topicsname = 'Nostalgia for pre-covid'
        else:
            print ("topic name not found")
        titlefigure = topicsname + ': coldspots of the score of ' '\n'+ label.lower() + ' tweets'
        filename = 'Figures\coldspot_' + topic + '_' + label + '.png'
        coldspot(label, topic, titlefigure, filename)

In [71]:
def hotcoldspot (sentiment, topic, title, pngname):
    dbf = geotweets[(geotweets['label'] == sentiment) & (geotweets['nametopic'] == topic)]
    score = dbf['score']

    knn = libpysal.weights.KNN.from_dataframe(dbf,geom_col='geometry',k=20)
    knn.transform = 'r'

    score_lag = libpysal.weights.lag_spatial(knn, score)

    li = esda.moran.Moran_Local(score, knn)
    (li.p_sim < 0.05).sum()
    sig = 1 * (li.p_sim < 0.05)
    hotspot = 1 * (sig * li.q==1)
    coldspot = 2 * (sig * li.q==2)
    spots = hotspot + coldspot 
    
    spot_labels = [ '0 not significant', '1 hot spot', '2 cold spot']
    labels = [spot_labels[i] for i in spots]

    from matplotlib import colors
    hmap = colors.ListedColormap(['lightgrey','red','blue'])
    f, ax = plt.subplots(1, figsize=(15, 15))
    bundeslander.plot(ax=ax, column='geometry',categorical=True, edgecolor='black', color='white')
    dbf.assign(cl=labels).plot(column='cl', categorical=True, \
        k=2, cmap=hmap, linewidth=0.1, ax=ax, \
        edgecolor='white', legend=True,)
    ax.set_axis_off()
    plt.title(title, fontsize=20)
    plt.savefig(pngname)

In [None]:
for label in labels:
    for topic in topics:
        df = geotweets[(geotweets['label'] == label) & (geotweets['nametopic'] == topic)]
        if topic == "lockdownactivities":
            topicsname = 'Lockdown activities'
        elif topic == 'coronapolicies':
            topicsname = 'Corona and policies'
        elif topic == 'prevention':
            topicsname = 'Prevention'
        elif topic == 'nostalgiaprecovid':
            topicsname = 'Nostalgia for pre-covid'
        else:
            print ("topic name not found")
        titlefigure = topicsname + ': hot- and cold spots of the score of ' '\n' + label.lower() + ' tweets'
        filename = 'Figures\hotcoldspot_' + topic + '_' + label + '2.png'
        hotcoldspot(label, topic, titlefigure, filename)

Local spatial autocorrelation

In [21]:
#Import libraries
import esda
import pandas as pd
import geopandas as gpd
from geopandas import GeoDataFrame
from mylibpysal import libpysal
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point
%matplotlib inline

In [22]:
def moran_scatterplot (sentiment, topic, titlefigure, xlabel, pngname):
    dbf = geotweets[(geotweets['label'] == sentiment) & (geotweets['nametopic'] == topic)]
    score = dbf['score']

    knn = libpysal.weights.KNN.from_dataframe(dbf,geom_col='geometry',k=20)
    knn.transform = 'r'

    score_lag = libpysal.weights.lag_spatial(knn, score)

    b, a = np.polyfit(score, score_lag, 1)
    f, ax = plt.subplots(1, figsize=(9, 9))

    plt.plot(score, score_lag, '.', color='darkslategrey')

    # dashed vert at mean of the 
    plt.vlines(score.mean(), score_lag.min(), score_lag.max(), linestyle='--')
    # dashed horizontal at mean of lagged  
    plt.hlines(score_lag.mean(), score.min(), score.max(), linestyle='--')

    # red line of best fit using global I as slope
    plt.plot(score, a + b*score, 'r')
    plt.title(titlefigure, fontsize=20)
    plt.ylabel('Spatial lag')
    plt.xlabel(xlabel)
    plt.savefig(pngname)
    plt.show()

In [None]:
#Create moran scatterplot for each label per topic
for label in labels:
    for topic in topics:
        df = geotweets[(geotweets['label'] == label) & (geotweets['nametopic'] == topic)]
        if topic == "lockdownactivities":
            topicsname = 'Lockdown activities'
        elif topic == 'coronapolicies':
            topicsname = 'Corona and policies'
        elif topic == 'prevention':
            topicsname = 'Prevention'
        elif topic == 'nostalgiaprecovid':
            topicsname = 'Nostalgia for pre-covid'
        else:
            print ("topic name not found")
        titlefigure = topicsname + ': moran scatterplot of the score of the ' + label.lower() +' tweets'
        xlabel = 'Score of ' + label.lower() + ' sentiment'
        filename = 'Figures\scatterplot' + topic + '_' + label + '.png'
        moran_scatterplot(label, topic, titlefigure, xlabel, filename)

Moran's I scores per topic and sentiment

In [24]:
def moranscore (sentiment, topic):
    dbf = geotweets[(geotweets['label'] == sentiment) & (geotweets['nametopic'] == topic)]
    score = dbf['score']

    w = libpysal.weights.KNN.from_dataframe(dbf,geom_col='geometry',k=20)
    w.transform = 'r'

    mi = esda.Moran(score, w)
    print(sentiment, topic, mi.I, mi.z_sim, mi.p_sim)

In [None]:
for label in labels:
    for topic in topics:
        df = geotweets[(geotweets['label'] == label) & (geotweets['nametopic'] == topic)]
        moranscore(label, topic)

Moran's I scores per state and sentiment

In [33]:
def moranscore_state_sentiment (state, sentiment, kwaarde):
    data = geotweets[geotweets['State'] == state]
    dbf = data[(data['label'] == sentiment)]
    score = dbf['score']

    w = libpysal.weights.KNN.from_dataframe(dbf,geom_col='geometry',k=kwaarde)
    w.transform = 'r'

    mi = esda.Moran(score, w)
    print(state, label, mi.I, mi.z_sim, mi.p_sim)

In [None]:
states = geotweets['State'].unique().tolist()

for state in states:
    for label in labels:
        df = geotweets[(geotweets['State'] == state) & (geotweets['label'] == label)]
        if len(df) > 10:
            if len(df) <= 20:
                k = 10
            else:
                k = 20
            moranscore_state_sentiment(state, label, k)
        else:
            print(state + label + 'Dataset has 10 or less rows')

Kernel density estimation

In [35]:
#Import libraries
import geoplot as gplt
import geoplot.crs as gcrs
import geopandas as gpd
import matplotlib.pyplot as plt

In [None]:
#Create KDE of positive and negative tweets
proj = gcrs.Mercator()

fig = plt.figure(figsize=(15, 10))
ax1 = plt.subplot(121, projection=proj)
ax2 = plt.subplot(122, projection=proj)

gplt.kdeplot(geotweets[geotweets['label']=='Positive'], shade=True, cmap='Reds',clip=geom_germany.geometry,ax=ax1, )
gplt.polyplot(geom_germany,ax=ax1, facecolor='white')
ax1.set_title("Positive tweets", fontsize=20)

gplt.kdeplot(geotweets[geotweets['label']=='Negative'], shade=True, cmap='Reds',clip=geom_germany.geometry,ax=ax2)
gplt.polyplot(geom_germany,ax=ax2, facecolor='white')
ax2.set_title("Negative tweets", fontsize=20)

plt.savefig("Figures\positivenegativeKDE.png")

In [None]:
#Create KDE of the topics
proj = gcrs.Mercator()

fig = plt.figure(figsize=(15, 10))
ax1 = plt.subplot(121, projection=proj)
ax2 = plt.subplot(122, projection=proj)

gplt.kdeplot(geotweets[geotweets['numbertopicname']=='1'], shade=True, cmap='Reds',clip=geom_germany.geometry,ax=ax1, )
gplt.polyplot(geom_germany,ax=ax1, facecolor='white')
ax1.set_title("Corona and policies", fontsize=20)

gplt.kdeplot(geotweets[geotweets['numbertopicname']=='2'], shade=True, cmap='Reds',clip=geom_germany.geometry,ax=ax2)
gplt.polyplot(geom_germany,ax=ax2, facecolor='white')
ax2.set_title("Lockdown activities", fontsize=20)

plt.savefig("Figures\coronapolicieslockdownactivitiesKDE.png")

In [None]:
proj = gcrs.Mercator()

fig = plt.figure(figsize=(15, 10))
ax1 = plt.subplot(121, projection=proj)
ax2 = plt.subplot(122, projection=proj)

gplt.kdeplot(geotweets[geotweets['numbertopicname']=='3'], shade=True, cmap='Reds',clip=geom_germany.geometry,ax=ax1, )
gplt.polyplot(geom_germany,ax=ax1, facecolor='white')
ax1.set_title("Prevention", fontsize=20)

plt.savefig("Figures\prevention.png")

KDE for each state

In [65]:
import matplotlib

matplotlib.rcParams.update(matplotlib.rcParamsDefault)

In [66]:
#Create KDE of the positive and negative tweets for each state
def sentimentKDE (state, png):
    proj = gcrs.Mercator()

    df = geotweets[geotweets['State'] == state]

    fig = plt.figure(figsize=(15, 10))
    ax1 = plt.subplot(121, projection=proj)
    ax2 = plt.subplot(122, projection=proj) 

    gplt.kdeplot(df[df['label']=='Positive'], shade=True, cmap='Reds',clip=geom_germany[geom_germany['GEN'] == state].geometry,ax=ax1,)
    gplt.polyplot(geom_germany[geom_germany['GEN'] == state].geometry,ax=ax1,  facecolor='white')
    ax1.set_title("Positive tweets", fontsize=20)

    gplt.kdeplot(df[df['label']=='Negative'], shade=True, cmap='Reds',clip=geom_germany[geom_germany['GEN'] == state].geometry,ax=ax2)
    gplt.polyplot(geom_germany[geom_germany['GEN'] == state].geometry,ax=ax2, facecolor='white',)
    ax2.set_title("Negative tweets", fontsize=20)

    plt.savefig(png)

In [None]:
states = geotweets['State'].unique().tolist()

for state in states: 
    pngname = 'Figures\\' + state + 'sentimentKDE2.png'
    sentimentKDE(state, pngname)

In [68]:
#Create KDE for the topics for each state

def policieslockdownKDE (state, png):
    proj = gcrs.Mercator()

    df = geotweets[geotweets['State'] == state]

    fig = plt.figure(figsize=(15, 10))
    ax1 = plt.subplot(121, projection=proj)
    ax2 = plt.subplot(122, projection=proj)

    gplt.kdeplot(df[df['numbertopicname']=='1'], shade=True, cmap='Reds',clip=geom_germany[geom_germany['GEN'] == state].geometry,ax=ax1, )
    gplt.polyplot(geom_germany[geom_germany['GEN'] == state],ax=ax1, facecolor='white')
    ax1.set_title("Corona and policies", fontsize=20)

    gplt.kdeplot(df[df['numbertopicname']=='2'], shade=True, cmap='Reds',clip=geom_germany[geom_germany['GEN'] == state].geometry,ax=ax2)
    gplt.polyplot(geom_germany[geom_germany['GEN'] == state],ax=ax2, facecolor='white')
    ax2.set_title("Lockdown activities", fontsize=20)

    plt.savefig(png)

def preventionKDE (state, png):
    proj = gcrs.Mercator()

    df = geotweets[geotweets['State'] == state]

    fig = plt.figure(figsize=(15, 10))
    ax1 = plt.subplot(121, projection=proj)
    
    gplt.kdeplot(df[df['numbertopicname']=='3'], shade=True, cmap='Reds',clip=geom_germany[geom_germany['GEN'] == state].geometry,ax=ax1, )
    gplt.polyplot(geom_germany[geom_germany['GEN'] == state],ax=ax1, facecolor='white')
    ax1.set_title("Prevention", fontsize=20)

    plt.savefig(png)

In [None]:
for state in states: 
    pngname = "Figures\\" + state + 'policieslockdownKDE2.png'
    policieslockdownKDE(state, pngname)

for state in states: 
    pngname = "Figures\\" + state + 'preventionKDE2.png'
    preventionKDE(state, pngname)