In [52]:
import pandas as pd

In [53]:
file_name = '../data/filtered_events_country_code.csv'

df = pd.read_csv(file_name, delimiter=',')

In [54]:
target_events = [
    "Protest with intervention",
    "Violent demonstration",
    "Mob violence"
]

violent_notes = df[
    df['notes'].str.contains('violent', case=False, na=False) &
    ~df['sub_event_type'].isin(target_events)
]

print("Notes that contain the word violent but were not classified as violent: \n")
for note in violent_notes['notes']:
    print(note)
    print('---')

Notes that contain the word violent but were not classified as violent: 

On 20 May 2025, in the evening, at the call of the Villamor community association, several dozen people from the Lutxana neighborhood protested in Barakaldo (Pais Vasco) to demand more security measures after a violent incident in which a man was killed in the area.
---
On 20 May 2025, citizens gathered in front of the municipality building in Kula, demanding accountability from all levels of government in Serbia for the state of society. A group of SNS supporters counter-protested and engaged in pushing, throwing objects, fireworks, and shouting at the protesting citizens. The police separated the cords and prevented more violent outcomes.
---
On 18 May 2025, in the morning, at the call of the Association Justice Animaux Savoie (Association Justice Animaux Savoie - AJAS), about 10 animal rights activists staged a protest outside the Les Herens en fete festival in Ugine (Auvergne-Rhone-Alpes) to denounce cow figh

In [55]:
fatality_count = df['fatalities'].value_counts()
print(fatality_count)

fatalities
0    185800
1        18
2         3
3         2
Name: count, dtype: int64


In [56]:
disorder_counts = df['sub_event_type'].value_counts()

print(disorder_counts, "\n")

sub_event_type
Peaceful protest                      174013
Protest with intervention               4673
Violent demonstration                   4320
Mob violence                            2743
Excessive force against protesters        74
Name: count, dtype: int64 



In [None]:
target_events = [
    "Violent demonstration",
    "Mob violence"
    #"Protest with intervention"
]

df['country_code'] = df['event_id_cnty'].astype(str).str[:3]
country_code_counts = df['country_code'].value_counts()

total_events = country_code_counts.sum()
country_percentages = (country_code_counts / total_events) * 100

violent_df = df[df['sub_event_type'].isin(target_events)].copy()
violent_df['country_code'] = violent_df['event_id_cnty'].astype(str).str[:3]

violent_counts = violent_df['country_code'].value_counts()

summary = pd.DataFrame({
    'total_protests': country_code_counts,
    'percentage_of_all': country_percentages.round(2),
    'violent_protests': violent_counts
})

summary['violent_protests'] = summary['violent_protests'].fillna(0).astype(int)

summary['percent_violent'] = (
    summary['violent_protests'] / summary['total_protests'] * 100
).round(2)
summary = summary.sort_values(by='percent_violent', ascending=False)


print("Total protests per country with violent protest stats:\n")
print(summary.to_string())


Total protests per country with violent protest stats:

              total_protests  percentage_of_all  violent_protests  percent_violent
country_code                                                                      
GRC                     5220               2.81              1132            21.69
ALB                     1284               0.69               167            13.01
CYP                     1894               1.02               158             8.34
FRA                    39236              21.11              1883             4.80
NLD                     4446               2.39               207             4.66
GBR                     8371               4.50               337             4.03
POL                     7829               4.21               277             3.54
DEU                    24050              12.94               827             3.44
CHE                     1049               0.56                36             3.43
BIH                     1148   

In [64]:
actor_counts = df['assoc_actor_1'].dropna().value_counts()
total_actor_mentions = actor_counts.sum()
actor_percentages = (actor_counts / total_actor_mentions) * 100

violent_actors = violent_df['assoc_actor_1'].dropna().value_counts()

actor_summary = pd.DataFrame({
    'total_protests': actor_counts,
    'percentage_of_all': actor_percentages.round(2),
    'violent_protests': violent_actors
})

actor_summary['violent_protests'] = actor_summary['violent_protests'].fillna(0).astype(int)
actor_summary['percent_violent'] = (
    actor_summary['violent_protests'] / actor_summary['total_protests'] * 100
).round(2)

actor_summary.index = actor_summary.index.str.slice(0, 50)

actor_summary = actor_summary.sort_values(by='percent_violent', ascending=False)

print("Total protests per actor with violent protest stats:\n")
print(actor_summary.to_string())

Total protests per actor with violent protest stats:

                                                    total_protests  percentage_of_all  violent_protests  percent_violent
assoc_actor_1                                                                                                           
XA: Golden Dawn; Police Forces of Greece (2019-);                1               0.00                 1           100.00
1 of 5 Million; SZS: Alliance for Serbia; Students               1               0.00                 1           100.00
Zouaves Paris                                                    2               0.00                 2           100.00
ANPI: National Association of Italian Partisans; A               1               0.00                 1           100.00
AUR: Alliance for the Union of Romanians; Governme               2               0.00                 2           100.00
Government of Moldova (2019-2023) Gagauzia Regiona               1               0.00              