In [6]:
import pandas as pd

# DO NOT CHANGE THESE DATAFRAMES
# CREATE COPIES
df_threats = pd.read_csv('Global_Cybersecurity_Threats_2015-2024.csv')
df_indices = pd.read_csv('Cyber_security.csv')
df_loss = pd.read_csv('LossFromNetCrime.csv')

# Work on copies so the originals stay untouched
threats = df_threats.copy()
indices = df_indices.copy()
loss = df_loss.copy()

# (small helper: standardize a couple of country names so they match the indices table)
name_map = {'USA': 'United States', 'UK': 'United Kingdom'}
threats['Country'] = threats['Country'].replace(name_map)

1) Do countries with higher Cybersecurity Index have fewer attacks?

In [7]:
# count attacks per country (all years combined in this dataset)
attacks_per_country = (threats
    .groupby('Country')
    .size()
    .reset_index(name='attacks'))

# keep only countries that exist in the indices table
attacks_idx = attacks_per_country.merge(indices, on='Country', how='left')

# correlations between "attacks" and each index (Pearson for simplicity)
corrs = attacks_idx[['attacks','CEI','GCI','NCSI','DDL']].corr().loc['attacks', ['CEI','GCI','NCSI','DDL']]
print(corrs.to_string())

CEI     0.004837
GCI     0.441569
NCSI    0.265943
DDL    -0.102155


(ŁUKASZ) Correlation among indices (NCSI, CEI, GCI, DDL)

In [8]:
print(indices[['CEI','GCI','NCSI','DDL']].corr().round(3))

        CEI    GCI   NCSI    DDL
CEI   1.000 -0.729 -0.734 -0.896
GCI  -0.729  1.000  0.831  0.631
NCSI -0.734  0.831  1.000  0.701
DDL  -0.896  0.631  0.701  1.000


(ŁUKASZ) Max/min attacks per country each year (from threats)

In [11]:
attack_year_country = (threats
    .groupby(['Year','Country'])
    .size()
    .reset_index(name='attacks'))

max_attack_per_year = attack_year_country.loc[attack_year_country.groupby('Year')['attacks'].idxmax()].sort_values('Year')
min_attack_per_year = attack_year_country.loc[attack_year_country.groupby('Year')['attacks'].idxmin()].sort_values('Year')

print("Max attacks per year:")
print(max_attack_per_year.to_string(index=False))

print("\nMin attacks per year:")
print(min_attack_per_year.to_string(index=False))

Max attacks per year:
 Year        Country  attacks
 2015 United Kingdom       34
 2016 United Kingdom       36
 2017          Japan       41
 2018          India       41
 2019 United Kingdom       32
 2020         France       38
 2021 United Kingdom       35
 2022      Australia       39
 2023          India       38
 2024          China       37

Min attacks per year:
 Year       Country  attacks
 2015       Germany       21
 2016         Japan       16
 2017         China       23
 2018         China       22
 2019       Germany       20
 2020     Australia       23
 2021         India       24
 2022        Brazil       25
 2023        Russia       24
 2024 United States       22


(ŁUKASZ) Losses & complaints: per year, per country + loss per complaint (money lost / complaints)

In [15]:
loss = loss.copy()
loss['loss_per_complaint'] = loss['Losses'] / loss['Complaints']

# By year: who has max/min complaints and max/min losses?
max_incidents_per_year = loss.loc[loss.groupby('Year')['Complaints'].idxmax()].sort_values('Year')
min_incidents_per_year = loss.loc[loss.groupby('Year')['Complaints'].idxmin()].sort_values('Year')
max_losses_per_year = loss.loc[loss.groupby('Year')['Losses'].idxmax()].sort_values('Year')
min_losses_per_year = loss.loc[loss.groupby('Year')['Losses'].idxmin()].sort_values('Year')

print("Max incidents per year:")
print(max_incidents_per_year[['Year','Country_Std','Complaints']].to_string(index=False))

print("\nMin incidents per year:")
print(min_incidents_per_year[['Year','Country_Std','Complaints']].to_string(index=False))

print("\nMax losses per year:")
print(max_losses_per_year[['Year','Country_Std','Losses']].to_string(index=False))

print("\nMin losses per year:")
print(min_losses_per_year[['Year','Country_Std','Losses']].to_string(index=False))

print("\nHighest loss per incident (by year):")
max_lpc_per_year = loss.loc[loss.groupby('Year')['loss_per_complaint'].idxmax()].sort_values('Year')
print(max_lpc_per_year[['Year','Country_Std','loss_per_complaint']].to_string(index=False))

Max incidents per year:
 Year   Country_Std  Complaints
 2019         China      449305
 2020 United States      796395
 2021         China      940125
 2022 United States      769205
 2023 United States      876894
 2024 United States      946966

Min incidents per year:
 Year           Country_Std  Complaints
 2019                 Yemen         216
 2020                 Yemen         362
 2021                 Yemen         391
 2022 Saint Kitts and Nevis         458
 2023                 Yemen         400
 2024                 Yemen         330

Max losses per year:
 Year   Country_Std      Losses
 2019 United States  3302744547
 2020 United States  3906715208
 2021 United States  6466618579
 2022 United States 10304392551
 2023 United States 11916782073
 2024 United States 14457011740

Min losses per year:
 Year Country_Std  Losses
 2019      Belize 2498015
 2020       Yemen 2673181
 2021    Maldives 4362630
 2022      Guinea 6648059
 2023      Guinea 8907146
 2024       Yemen 97928