# Checking the ETDA -> ATT&CK merge

In [2]:
import pandas as pd

df = pd.read_json('ETDA_ATTCK_merge.json')
print(df.columns)

Index(['mitre_attack_id', 'mitre_attack_name', 'mitre_attack_aliases',
       'mitre_attack_created', 'mitre_attack_last_modified', 'mitre_url',
       'etda_id', 'etda_name', 'etda_aliases', 'etda_first_seen', 'etda_url',
       'country', 'motivation', 'victim_industries', 'victim_countries',
       'mitre_attack_ttps'],
      dtype='object')


In [3]:
df['etda_name'].value_counts()

APT 19, Deep Panda, C0d0so0                    2
Lotus Blossom, Spring Dragon, Thrip            2
Whitefly, Mofang                               2
APT 17, Deputy Dog, Elderwood, Sneaky Panda    2
DarkHotel                                      2
                                              ..
Pale Panda                                     1
Overlord Spider                                1
Outlaw Spider                                  1
Magnetic Spider                                1
Zombie Spider                                  1
Name: etda_name, Length: 434, dtype: int64

## Functions to help access the data

In [4]:
def get_actor_ETDA(actor):
   return df.loc[df['etda_name'] == actor]

def get_actor_list_ETDA(actor_list):
   return df.loc[df['etda_name'].isin(actor_list)]

def to_1D(series):
 return pd.Series([x for _list in series for x in _list if len(x) > 1]) # Convert column of lists to one list and remove single character values

def get_industries():
    return to_1D(df['victim_industries']).value_counts().index.to_list()


In [5]:
get_actor_ETDA('APT 29, Cozy Bear, The Dukes')

Unnamed: 0,mitre_attack_id,mitre_attack_name,mitre_attack_aliases,mitre_attack_created,mitre_attack_last_modified,mitre_url,etda_id,etda_name,etda_aliases,etda_first_seen,etda_url,country,motivation,victim_industries,victim_countries,mitre_attack_ttps
8,intrusion-set--899ce53f-13a0-479b-a0e4-67d46e2...,APT29,"[APT29, IRON RITUAL, IRON HEMLOCK, NobleBaron,...",2017-05-31,2022-07-11,https://attack.mitre.org/groups/G0016,93ba9804-335e-4782-855d-40af22b93201,"APT 29, Cozy Bear, The Dukes","[APT 29, Cozy Bear, The Dukes, Group 100, Yttr...",2008,https://apt.etda.or.th/cgi-bin/showcard.cgi?u=...,RU,[Information theft and espionage],"[Aerospace, Defense, Education, Energy, Financ...","[Australia, Azerbaijan, Belarus, Belgium, Braz...","[T1078.004, T1001.002, T1087, T1562.004, T1560..."
31,intrusion-set--dc5e2999-ca1a-47d4-8d12-a6984b1...,UNC2452,"[UNC2452, NOBELIUM, StellarParticle, Dark Halo]",2021-01-05,2021-05-04,https://attack.mitre.org/groups/G0118,93ba9804-335e-4782-855d-40af22b93201,"APT 29, Cozy Bear, The Dukes","[APT 29, Cozy Bear, The Dukes, Group 100, Yttr...",2008,https://apt.etda.or.th/cgi-bin/showcard.cgi?u=...,RU,[Information theft and espionage],"[Aerospace, Defense, Education, Energy, Financ...","[Australia, Azerbaijan, Belarus, Belgium, Braz...","[T1027, T1606.001, T1218.011, T1546.003, T1021..."


# Statistics

In [7]:
industries = get_industries()
print("Number of actors in data:\t",len(df))
print("Number of industries in data:\t",len(get_industries()))

print('\nNumber of actors per industry:')
to_1D(df['victim_industries']).value_counts()

Number of actors in data:	 441
Number of industries in data:	 42

Number of actors per industry:


Government                     183
Defense                        111
Financial                      102
Energy                          85
Telecommunications              81
Media                           70
Education                       68
Healthcare                      57
Manufacturing                   50
High-Tech                       37
Technology                      33
Transportation                  32
IT                              32
Aerospace                       29
Hospitality                     27
Oil and gas                     26
Aviation                        26
Engineering                     24
Pharmaceutical                  23
Retail                          23
NGOs                            20
Construction                    19
Think Tanks                     17
Shipping and Logistics          16
Industrial                      16
Embassies                       14
Chemical                        13
Utilities                       12
Automotive          

#### Explanations