In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import seaborn  as sns
import geopandas as gpd
import fiona
import shapely
import matplotlib as mpl
from statannotations.Annotator import Annotator
from statannotations.stats.StatTest import StatTest
import pickle
from shapely.geometry import Point, MultiPoint
import scipy
from shapely import wkt
import statsmodels.api as sm


In [None]:
mpl.rcParams['axes.titlesize'] = 16
mpl.rcParams['axes.labelsize'] = 14
mpl.rcParams['legend.fontsize'] = 12
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['ytick.labelsize'] = 12
mpl.rcParams['axes.spines.right'] = False
mpl.rcParams['axes.spines.top'] = False
plt.rcParams.update({'font.size': 16})

#### Load dataset

In [None]:
# import dataframe
df=pd.read_csv("../Data/Data_S4.csv")

In [None]:
df['geometry'] = df['geometry'].apply(wkt.loads)


In [None]:
# trasform into geodataframe
gdf = gpd.GeoDataFrame(
    df,
    geometry="geometry",
    crs="EPSG:3035"  # Set CRS to EPSG:3035
)


#### Plots and statistical analysis

##### Association with population

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
ax=sns.boxplot(data=gdf, x="Fear", y="logP")
pairs=[("Yes","No")]
annotator = Annotator(ax, pairs, data=gdf, x="Fear", y="logP", order=["No","Yes"])
# Required descriptors for annotate
custom_long_name = 'T Test'
custom_short_name = 'T Test'
custom_func = scipy.stats.ttest_ind
custom_test = StatTest(custom_func, custom_long_name, custom_short_name)
annotator.configure(test=custom_test, text_format='star', loc='outside')
annotator.apply_and_annotate()
plt.ylabel(r"$\log_{10}$ Population (1789)")
plt.show()

##### Association with population: restricted to towns (more than 2000 inhabitants)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
ax=sns.boxplot(data=gdf[gdf.Town], x="Fear", y="logP", order=["No","Yes"])
pairs=[("No","Yes")]
annotator = Annotator(ax, pairs, data=gdf[gdf.Town], x="Fear", y="logP", order=["No","Yes"])
# Required descriptors for annotate
custom_long_name = 'T Test'
custom_short_name = 'T Test'
custom_func = scipy.stats.ttest_ind
custom_test = StatTest(custom_func, custom_long_name, custom_short_name)
annotator.configure(test=custom_test, text_format='star', loc='outside')
annotator.apply_and_annotate()
plt.ylabel(r"$\log_{10}$ Population in towns (1789)")
plt.show()

##### Plot map

In [None]:
# import outline
fr_outline=gpd.read_file("../Maps/france_outline")

In [None]:
fig = plt.figure(figsize=(8,8))
ax = fig.add_axes([0, 0, 1, 1])
ax.axis('off')
fr_outline.plot(facecolor="white", edgecolor='black', lw=0.3, ax=ax)
gdf[gdf.Town].plot(column="Fear", marker=".", ax=ax, cmap="coolwarm", s=gdf[gdf.Town]['poptot']/200)
plt.xlim(0.32e7,0.42e7)
plt.ylim(2.1e6,3.2e6)

##### Correlation with the participation in the 1789 referendum

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
ax=sns.boxplot(data=gdf, x="Fear", y="ppar")
pairs=[("Yes","No")]
annotator = Annotator(ax, pairs, data=gdf, x="Fear", y="ppar", order=["No","Yes"])
# Required descriptors for annotate
custom_long_name = 'T Test'
custom_short_name = 'T Test'
custom_func = scipy.stats.ttest_ind
custom_test = StatTest(custom_func, custom_long_name, custom_short_name)
annotator.configure(test=custom_test, text_format='star', loc='outside')
annotator.apply_and_annotate()
plt.ylabel(r"Participation in 1793 referendum")
plt.show()

##### Correlation with the participation in the 1789 referendum (towns with more than 2000 inhabitants)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
ax=sns.boxplot(data=gdf[gdf.Town], x="Fear", y="ppar", order=["No","Yes"])
pairs=[("Yes","No")]
annotator = Annotator(ax, pairs, data=gdf[gdf.Town], x="Fear", y="ppar", order=["No","Yes"])
# Required descriptors for annotate
custom_long_name = 'T Test'
custom_short_name = 'T Test'
custom_func = scipy.stats.ttest_ind
custom_test = StatTest(custom_func, custom_long_name, custom_short_name)
annotator.configure(test=custom_test, text_format='star', loc='outside')
annotator.apply_and_annotate()
plt.ylabel(r"Participation in 1793 referendum (towns)")
plt.show()

##### Association with literacy

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
ax=sns.boxplot(data=gdf, x="Fear", y="% signed")
pairs=[("Yes","No")]
annotator = Annotator(ax, pairs, data=gdf, x="Fear", y="% signed")
# Required descriptors for annotate
custom_long_name = 'T Test'
custom_short_name = 'T Test'
custom_func = scipy.stats.ttest_ind
custom_test = StatTest(custom_func, custom_long_name, custom_short_name)
annotator.configure(test=custom_test, text_format='star', loc='outside')
annotator.apply_and_annotate()
plt.ylabel("Literacy rate (1786)")
plt.show()

##### Association with literacy (towns with more than 2000 inhabitants)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
ax=sns.boxplot(data=gdf[gdf.Town], x="Fear", y="% signed", order=["No","Yes"])
pairs=[("Yes","No")]
annotator = Annotator(ax, pairs, data=gdf[gdf.Town], x="Fear", y="% signed")
# Required descriptors for annotate
custom_long_name = 'T Test'
custom_short_name = 'T Test'
custom_func = scipy.stats.ttest_ind
custom_test = StatTest(custom_func, custom_long_name, custom_short_name)
annotator.configure(test=custom_test, text_format='star', loc='outside')
annotator.apply_and_annotate()
plt.ylabel("Literacy rate (1786 towns)")
plt.show()

##### Association with income

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
ax=sns.boxplot(data=gdf, x="Fear", y="revratio1790", order=["No","Yes"])
pairs=[("Yes","No")]
annotator = Annotator(ax, pairs, data=gdf, x="Fear", y="revratio1790")
# Required descriptors for annotate
custom_long_name = 'T Test'
custom_short_name = 'T Test'
custom_func = scipy.stats.ttest_ind
custom_test = StatTest(custom_func, custom_long_name, custom_short_name)
annotator.configure(test=custom_test, text_format='star', loc='outside')
annotator.apply_and_annotate()
plt.ylabel("Relative income per person (1790)")
plt.show()

##### Association with income (towns of more than 2000 inhabitants)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
ax=sns.boxplot(data=gdf[gdf.Town], x="Fear", y="revratio1790", order=["No","Yes"])
pairs=[("Yes","No")]
annotator = Annotator(ax, pairs, data=gdf[gdf.Town], x="Fear", y="revratio1790")
# Required descriptors for annotate
custom_long_name = 'T Test'
custom_short_name = 'T Test'
custom_func = scipy.stats.ttest_ind
custom_test = StatTest(custom_func, custom_long_name, custom_short_name)
annotator.configure(test=custom_test, text_format='star', loc='outside')
annotator.apply_and_annotate()
plt.ylabel("Relative income per person (1790 towns)")
plt.show()

##### Association with ownership

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
ax=sns.boxplot(data=gdf, x="Fear", y="perpropri1790", order=["No","Yes"])
pairs=[("Yes","No")]
annotator = Annotator(ax, pairs, data=gdf, x="Fear", y="perpropri1790")
# Required descriptors for annotate
custom_long_name = 'T Test'
custom_short_name = 'T Test'
custom_func = scipy.stats.ttest_ind
custom_test = StatTest(custom_func, custom_long_name, custom_short_name)
annotator.configure(test=custom_test, text_format='star', loc='outside')
annotator.apply_and_annotate()
plt.ylabel("% ownership (1790)")
plt.show()

In [None]:
##### Association with ownership (towns of more than 2000 inhabitants)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(4, 4))
ax=sns.boxplot(data=gdf[gdf.Town], x="Fear", y="perpropri1790", order=["No","Yes"])
pairs=[("Yes","No")]
annotator = Annotator(ax, pairs, data=gdf[gdf.Town], x="Fear", y="perpropri1790")
# Required descriptors for annotate
custom_long_name = 'T Test'
custom_short_name = 'T Test'
custom_func = scipy.stats.ttest_ind
custom_test = StatTest(custom_func, custom_long_name, custom_short_name)
annotator.configure(test=custom_test, text_format='star', loc='outside')
annotator.apply_and_annotate()
plt.ylabel("% ownership (1790 towns)")
plt.show()

#### Multiple logistic regression

In [None]:
df1=gdf[gdf.Town][["logP","revratio1790","perpropri1790","% signed",'ppar',"Fear"]].dropna()

In [None]:
def binarize_fear(x):
    if x=="Yes":
        fear=1
    else:
        fear=0
    return fear

In [None]:
df1.Fear=df1.Fear.apply(lambda x: binarize_fear(x))

In [None]:
X=df1[['logP', 'revratio1790', 'perpropri1790', '% signed', 'ppar']]
Y=df1["Fear"]

In [None]:
X = sm.add_constant(X)

# Fit the logistic regression model
model = sm.Logit(Y, X)
result = model.fit()

# Print the summary
print(result.summary())
