In [62]:
import pandas as pd

In [63]:
df = pd.read_csv('../data/filtered_events_country_code.csv')
print(df['event_type'].unique())
print(df['sub_event_type'].unique())
print(df['disorder_type'].unique())

['Protests' 'Riots']
['Peaceful protest' 'Protest with intervention' 'Violent demonstration'
 'Excessive force against protesters']
['Demonstrations' 'Political violence; Demonstrations']


In [64]:
df["violent"] = "NoN"
df.head()
df.to_csv('../data/violent_bool.csv', index=False)

In [65]:
for index, row in df.iterrows():
    if row["sub_event_type"] == "Peaceful protest" or row["sub_event_type"] == "Protest with intervention":
        df.at[index, "violent"] = 0
    elif row["sub_event_type"] == "Violent demonstration" or row["sub_event_type"] == "Excessive force against protesters":
        df.at[index, "violent"] = 1
df.to_csv('../data/violent_bool.csv', index=False)
print(df['violent'].value_counts())

violent
0    178686
1      4394
Name: count, dtype: int64


In [66]:
df['country_code'] = df['event_id_cnty'].astype(str).str[:3]
country_code_counts = df['country_code'].value_counts()

violent_counts = df[df['violent'] == 1]['country_code'].value_counts()
non_violent_counts = df[df['violent'] == 0]['country_code'].value_counts()
total_counts = df['country_code'].value_counts()

country_counts = pd.DataFrame({
    'violent': violent_counts,
    'non_violent': non_violent_counts,
    'total': total_counts
}).fillna(0)
country_counts['violent_percentage'] = (country_counts['violent'] / (country_counts['violent'] + country_counts['non_violent'])) * 100
country_counts.to_csv('../data/violent_per_country.csv')

In [67]:
print(country_counts.sort_values(by='violent_percentage', ascending=False))


              violent  non_violent  total  violent_percentage
country_code                                                 
GRC             588.0         4082   4670           12.591006
ALB             135.0         1116   1251           10.791367
FRA            1432.0        37342  38774            3.693196
NLD             142.0         4238   4380            3.242009
CHE              29.0         1012   1041            2.785783
CYP              48.0         1736   1784            2.690583
DEU             578.0        23214  23792            2.429388
AUT              49.0         1984   2033            2.410231
GBR             178.0         8032   8210            2.168088
ESP             400.0        20364  20764            1.926411
MDA              29.0         1676   1705            1.700880
ITA             430.0        25006  25436            1.690517
LUX               3.0          191    194            1.546392
BEL              61.0         4002   4063            1.501354
BIH     

In [68]:
df = df.dropna(subset=['assoc_actor_1'])
df['assoc_actor_1'] = df['assoc_actor_1'].str.split(';')
df = df.explode('assoc_actor_1')
df['assoc_actor_1'] = df['assoc_actor_1'].str.replace(r'\(.*?\)', '', regex=True).str.strip()

violent_actor_counts = df[df['violent'] == 1]['assoc_actor_1'].value_counts()
non_violent_actor_counts = df[df['violent'] == 0]['assoc_actor_1'].value_counts()
total_actor_counts = df['assoc_actor_1'].value_counts()

actor_counts = pd.DataFrame({
    'violent': violent_actor_counts,
    'non_violent': non_violent_actor_counts,
    'total': total_actor_counts
}).fillna(0)

actor_counts['violent_percentage'] = (actor_counts['violent'] / (actor_counts['violent'] + actor_counts['non_violent'])) * 100
actor_counts.to_csv('../data/violent_per_actor.csv')
print(actor_counts)

                               violent  non_violent  total  violent_percentage
assoc_actor_1                                                                 
1 of 5 Million                     1.0        678.0    679            0.147275
100% Animalisti                    1.0         41.0     42            2.380952
1030/0 Collective                  0.0          1.0      1            0.000000
18 April Committee                 0.0          7.0      7            0.000000
269 LA: 269 Animal Liberation      0.0          3.0      3            0.000000
...                                ...          ...    ...                 ...
Zero Waste Europe                  0.0          5.0      5            0.000000
ZeroCovid                          0.0         10.0     10            0.000000
Zimbabwean Group                   0.0          1.0      1            0.000000
Zouaves Paris                      0.0          1.0      1            0.000000
sp.a: Flemish Socialist Party      0.0          5.0 

In [69]:
print(actor_counts.sort_values(by='violent_percentage', ascending=False))

                                  violent  non_violent  total  \
assoc_actor_1                                                   
BNP: British National Party           2.0          0.0      2   
Vigilante Group                       2.0          0.0      2   
SLD: Democratic Left Alliance         1.0          0.0      1   
Revolutionary Struggle                1.0          0.0      1   
Will                                  1.0          0.0      1   
...                                   ...          ...    ...   
Government of the People APS          0.0          3.0      3   
Government of the Czech Republic      0.0         42.0     42   
Government of Ukraine                 0.0         65.0     65   
Government of Turkey                  0.0          1.0      1   
sp.a: Flemish Socialist Party         0.0          5.0      5   

                                  violent_percentage  
assoc_actor_1                                         
BNP: British National Party                 

In [70]:
# klopt nog niet denk ik
def probability(country, actor):
    p_country = country_counts['total'].get(country) / country_counts['total'].sum()
    p_actor = actor_counts['total'].get(actor) / actor_counts['total'].sum()
    p_violent = country_counts['violent'].get(country) / country_counts['total'].get(country)
    p_country_given_actor = actor_counts['violent'].get(actor) / actor_counts['violent'].sum()
    p_actor_given_country = actor_counts['violent'].get(actor) / actor_counts['total'].get(actor)
    likelihood = p_country_given_actor * p_actor_given_country
    prior = p_violent
    marginal = p_country * p_actor
    return (likelihood * prior) / marginal

print(probability('NLD', 'XR: Extinction Rebellion'))


0.002258463566184467


In [72]:
# what is the violent percentage of XR: Extinction Rebellion?
XR = actor_counts.loc['XR: Extinction Rebellion', 'violent_percentage']
NLD = country_counts.loc['NLD', 'violent_percentage']
print(XR * NLD)

1.8993293700523293
