In [1]:
import pandas as pd

In [2]:
file_name = '../data/filtered_events_country_code.csv'

df = pd.read_csv(file_name, delimiter=',')

In [3]:
target_events = [
    "Protest with intervention",
    "Violent demonstration",
    "Mob violence"
]

violent_notes = df[
    df['notes'].str.contains('violent', case=False, na=False) &
    ~df['sub_event_type'].isin(target_events)
]

print("Notes that contain the word violent but were not classified as violent: \n")
for note in violent_notes['notes']:
    print(note)
    print('---')

Notes that contain the word violent but were not classified as violent: 

On 20 May 2025, in the evening, at the call of the Villamor community association, several dozen people from the Lutxana neighborhood protested in Barakaldo (Pais Vasco) to demand more security measures after a violent incident in which a man was killed in the area.
---
On 20 May 2025, citizens gathered in front of the municipality building in Kula, demanding accountability from all levels of government in Serbia for the state of society. A group of SNS supporters counter-protested and engaged in pushing, throwing objects, fireworks, and shouting at the protesting citizens. The police separated the cords and prevented more violent outcomes.
---
On 18 May 2025, in the morning, at the call of the Association Justice Animaux Savoie (Association Justice Animaux Savoie - AJAS), about 10 animal rights activists staged a protest outside the Les Herens en fete festival in Ugine (Auvergne-Rhone-Alpes) to denounce cow figh

In [4]:
fatality_count = df['fatalities'].value_counts()
print(fatality_count)

fatalities
0    183075
2         3
1         1
3         1
Name: count, dtype: int64


In [5]:
disorder_counts = df['sub_event_type'].value_counts()

print(disorder_counts, "\n")

sub_event_type
Peaceful protest                      174013
Protest with intervention               4673
Violent demonstration                   4320
Excessive force against protesters        74
Name: count, dtype: int64 



In [6]:
target_events = [
    "Violent demonstration",
    "Mob violence"
    #"Protest with intervention"
]

df['country_code'] = df['event_id_cnty'].astype(str).str[:3]
country_code_counts = df['country_code'].value_counts()

total_events = country_code_counts.sum()
country_percentages = (country_code_counts / total_events) * 100

violent_df = df[df['sub_event_type'].isin(target_events)].copy()
violent_df['country_code'] = violent_df['event_id_cnty'].astype(str).str[:3]

violent_counts = violent_df['country_code'].value_counts()

summary = pd.DataFrame({
    'total_protests': country_code_counts,
    'percentage_of_all': country_percentages.round(2),
    'violent_protests': violent_counts
})

summary['violent_protests'] = summary['violent_protests'].fillna(0).astype(int)

summary['percent_violent'] = (
    summary['violent_protests'] / summary['total_protests'] * 100
).round(2)
summary = summary.sort_values(by='percent_violent', ascending=False)


print("Total protests per country not noted as peaceful protests:\n")
print(summary.to_string())


Total protests per country not noted as peaceful protests:

              total_protests  percentage_of_all  violent_protests  percent_violent
country_code                                                                      
GRC                     4670               2.55               582            12.46
ALB                     1251               0.68               134            10.71
FRA                    38774              21.18              1421             3.66
NLD                     4380               2.39               141             3.22
CYP                     1784               0.97                48             2.69
CHE                     1041               0.57                28             2.69
DEU                    23792              13.00               569             2.39
AUT                     2033               1.11                46             2.26
GBR                     8210               4.48               176             2.14
ESP                    2076

In [7]:
actor_counts = df['assoc_actor_1'].dropna().value_counts()
total_actor_mentions = actor_counts.sum()
actor_percentages = (actor_counts / total_actor_mentions) * 100

violent_actors = violent_df['assoc_actor_1'].dropna().value_counts()

actor_summary = pd.DataFrame({
    'total_protests': actor_counts,
    'percentage_of_all': actor_percentages.round(2),
    'violent_protests': violent_actors
})

actor_summary['violent_protests'] = actor_summary['violent_protests'].fillna(0).astype(int)
actor_summary['percent_violent'] = (
    actor_summary['violent_protests'] / actor_summary['total_protests'] * 100
).round(2)

actor_summary.index = actor_summary.index.str.slice(0, 50)

actor_summary = actor_summary.sort_values(by='total_protests', ascending=False)

print("Total protests per actor not noted as peaceful:\n")
print(actor_summary.to_string())

Total protests per actor not noted as peaceful:

                                                    total_protests  percentage_of_all  violent_protests  percent_violent
assoc_actor_1                                                                                                           
Labor Group (France)                                          3540               2.61                94             2.66
XR: Extinction Rebellion                                      3202               2.36                10             0.31
Labor Group (Spain)                                           2906               2.14                77             2.65
Labor Group (Italy)                                           2462               1.82                11             0.45
FFF: Fridays for Future; Students (Sweden)                    2383               1.76                 0             0.00
CGT: General Confederation of Labor (France); Labo            2024               1.49                35 

In [8]:
event_types = df['event_type'].value_counts()
print(event_types)

event_type
Protests    178760
Riots         4320
Name: count, dtype: int64


In [9]:
interaction_types = df['interaction'].value_counts()
print(interaction_types)

interaction
60    171518
16      4345
15      2798
66      2500
50      1068
56       332
55       212
57       188
68        74
58        31
36        13
35         1
Name: count, dtype: int64


In [10]:
df['country_code'] = df['event_id_cnty'].astype(str).str[:3]

country_counts = df['country_code'].value_counts()
total_events = country_counts.sum()
country_percentages = (country_counts / total_events) * 100

riots_df = df[df['event_type'] == 'Riots'].copy()
riots_df['country_code'] = riots_df['event_id_cnty'].astype(str).str[:3]

riot_counts = riots_df['country_code'].value_counts()

summary = pd.DataFrame({
    'total_protests': country_counts,
    'percentage_of_all': country_percentages.round(2),
    'riot_protests': riot_counts
})

summary['riot_protests'] = summary['riot_protests'].fillna(0).astype(int)

summary['percent_riots'] = (
    summary['riot_protests'] / summary['total_protests'] * 100
).round(2)

summary = summary.sort_values(by='percent_riots', ascending=False)

print("Riots per country:\n")
print(summary.to_string())


Riots per country:

              total_protests  percentage_of_all  riot_protests  percent_riots
country_code                                                                 
GRC                     4670               2.55            582          12.46
ALB                     1251               0.68            134          10.71
FRA                    38774              21.18           1421           3.66
NLD                     4380               2.39            141           3.22
CYP                     1784               0.97             48           2.69
CHE                     1041               0.57             28           2.69
DEU                    23792              13.00            569           2.39
AUT                     2033               1.11             46           2.26
GBR                     8210               4.48            176           2.14
ESP                    20764              11.34            396           1.91
ITA                    25436              13

In [11]:
actor_counts = df['assoc_actor_1'].dropna().value_counts()
total_actor_mentions = actor_counts.sum()
actor_percentages = (actor_counts / total_actor_mentions) * 100

riots_df = df[df['event_type'] == 'Riots']
riot_actor_counts = riots_df['assoc_actor_1'].dropna().value_counts()

actor_summary = pd.DataFrame({
    'total_protests': actor_counts,
    'percentage_of_all': actor_percentages.round(2),
    'riot_protests': riot_actor_counts
})

actor_summary['riot_protests'] = actor_summary['riot_protests'].fillna(0).astype(int)
actor_summary['percent_riots'] = (
    actor_summary['riot_protests'] / actor_summary['total_protests'] * 100
).round(2)

actor_summary.index = actor_summary.index.str.slice(0, 50)

actor_summary = actor_summary.sort_values(by='total_protests', ascending=False)

print("Riots per actor:\n")
print(actor_summary.to_string())


Riots per actor:

                                                    total_protests  percentage_of_all  riot_protests  percent_riots
assoc_actor_1                                                                                                      
Labor Group (France)                                          3540               2.61             94           2.66
XR: Extinction Rebellion                                      3202               2.36             10           0.31
Labor Group (Spain)                                           2906               2.14             77           2.65
Labor Group (Italy)                                           2462               1.82             11           0.45
FFF: Fridays for Future; Students (Sweden)                    2383               1.76              0           0.00
CGT: General Confederation of Labor (France); Labo            2024               1.49             35           1.73
CFDT: French Democratic Confederation of Labor; CF    