# Checking the ETDA -> ATT&CK merge

In [1]:
import pandas as pd

df = pd.read_json('ETDA_ATTCK_merge.json')
print(df.columns)

Index(['mitre_attack_id', 'mitre_attack_name', 'mitre_attack_aliases',
       'mitre_attack_created', 'mitre_attack_last_modified', 'mitre_url',
       'etda_id', 'etda_name', 'etda_aliases', 'etda_first_seen', 'etda_url',
       'etda_operation_dates', 'etda_operation_year', 'country', 'motivation',
       'victim_industries', 'victim_countries', 'mitre_attack_ttps'],
      dtype='object')


In [2]:
df['etda_name'].value_counts()

APT 29, Cozy Bear, The Dukes              2
Lotus Blossom, Spring Dragon, Thrip       2
DarkHotel                                 2
OilRig, APT 34, Helix Kitten, Chrysene    2
APT 19, Deep Panda, C0d0so0               2
                                         ..
Poisonous Panda                           1
ParkingBear                               1
Pale Panda                                1
Overlord Spider                           1
Zombie Spider                             1
Name: etda_name, Length: 448, dtype: int64

## Functions to help access the data

In [3]:
def get_actor_ETDA(actor):
   return df.loc[df['etda_name'] == actor]

def get_actor_list_ETDA(actor_list):
   return df.loc[df['etda_name'].isin(actor_list)]

def to_1D(series):
 return pd.Series([x for _list in series for x in _list if len(x) > 1]) # Convert column of lists to one list and remove single character values

def get_industries():
    return to_1D(df['victim_industries']).value_counts().index.to_list()


In [4]:
get_actor_ETDA('APT 29, Cozy Bear, The Dukes')

Unnamed: 0,mitre_attack_id,mitre_attack_name,mitre_attack_aliases,mitre_attack_created,mitre_attack_last_modified,mitre_url,etda_id,etda_name,etda_aliases,etda_first_seen,etda_url,etda_operation_dates,etda_operation_year,country,motivation,victim_industries,victim_countries,mitre_attack_ttps
0,intrusion-set--899ce53f-13a0-479b-a0e4-67d46e2...,APT29,"[APT29, IRON RITUAL, IRON HEMLOCK, NobleBaron,...",2017-05-31,2023-04-16,https://attack.mitre.org/groups/G0016,93ba9804-335e-4782-855d-40af22b93201,"APT 29, Cozy Bear, The Dukes","[APT 29, Cozy Bear, The Dukes, Group 100, Yttr...",2008,https://apt.etda.or.th/cgi-bin/showcard.cgi?u=...,"[2013-02, 2013, 2013, 2014-03, 2015-08, 2016-0...","[2013, 2013, 2013, 2014, 2015, 2016, 2016, 201...",RU,[Information theft and espionage],"[Aerospace, Defense, Education, Energy, Financ...","[Australia, Azerbaijan, Belarus, Belgium, Braz...","[T1586.002, T1548.002, T1098.005, T1566.003, T..."
61,intrusion-set--dc5e2999-ca1a-47d4-8d12-a6984b1...,UNC2452,"[UNC2452, NOBELIUM, StellarParticle, Dark Halo]",2021-01-05,2021-05-04,https://attack.mitre.org/groups/G0118,93ba9804-335e-4782-855d-40af22b93201,"APT 29, Cozy Bear, The Dukes","[APT 29, Cozy Bear, The Dukes, Group 100, Yttr...",2008,https://apt.etda.or.th/cgi-bin/showcard.cgi?u=...,"[2013-02, 2013, 2013, 2014-03, 2015-08, 2016-0...","[2013, 2013, 2013, 2014, 2015, 2016, 2016, 201...",RU,[Information theft and espionage],"[Aerospace, Defense, Education, Energy, Financ...","[Australia, Azerbaijan, Belarus, Belgium, Braz...",[]


# Statistics

In [6]:
industries = get_industries()
print("Number of actors in data:\t",len(df))
print("Number of industries in data:\t",len(get_industries()))

print('\nNumber of actors per industry:')
industries = df['victim_industries'].explode().value_counts()#.sort_index()
industries
# for industry in industries:
#     print(industry)

Number of actors in data:	 455
Number of industries in data:	 42

Number of actors per industry:


Government                     192
None Provided                  151
Defense                        113
Financial                      102
Energy                          86
Telecommunications              85
Education                       73
Media                           71
Healthcare                      58
Manufacturing                   52
High-Tech                       37
IT                              36
Transportation                  34
Technology                      34
Aerospace                       29
Aviation                        28
Hospitality                     27
Oil and gas                     26
Engineering                     24
Retail                          23
Pharmaceutical                  23
NGOs                            21
Construction                    20
Shipping and Logistics          18
Think Tanks                     17
Industrial                      16
Embassies                       14
Chemical                        13
Utilities           

#### Explanations