# Checking the ETDA -> ATT&CK merge

In [40]:
import pandas as pd

df = pd.read_json('ETDA_ATTCK_merge.json')
print(df.columns)

Index(['mitre_attack_id', 'mitre_attack_name', 'mitre_attack_aliases',
       'mitre_attack_created', 'mitre_attack_last_modified', 'mitre_url',
       'etda_id', 'etda_name', 'etda_aliases', 'etda_first_seen', 'etda_url',
       'etda_operation_dates', 'etda_operation_year', 'country', 'motivation',
       'victim_industries', 'victim_countries', 'mitre_attack_ttps'],
      dtype='object')


In [41]:
df['etda_name'].value_counts()

etda_name
Magic Hound, APT 35, Cobalt Illusion, Charming Kitten    2
APT 19, Deep Panda, C0d0so0                              2
Lotus Blossom, Spring Dragon, Thrip                      2
APT 17, Deputy Dog, Elderwood, Sneaky Panda              2
OilRig, APT 34, Helix Kitten, Chrysene                   2
                                                        ..
Poisonous Panda                                          1
ParkingBear                                              1
Pale Panda                                               1
Overlord Spider                                          1
Zombie Spider                                            1
Name: count, Length: 462, dtype: int64

## Functions to help access the data

In [42]:
def get_actor_ETDA(actor):
   return df.loc[df['etda_name'] == actor]

def get_actor_list_ETDA(actor_list):
   return df.loc[df['etda_name'].isin(actor_list)]

def to_1D(series):
 return pd.Series([x for _list in series for x in _list if len(x) > 1]) # Convert column of lists to one list and remove single character values

def get_industries():
    return to_1D(df['victim_industries']).unique()

def get_motivations():
   return to_1D(df['motivation']).unique()

def get_countries():
   return to_1D(df['victim_countries']).unique()


In [43]:
get_actor_ETDA('APT 29, Cozy Bear, The Dukes')

Unnamed: 0,mitre_attack_id,mitre_attack_name,mitre_attack_aliases,mitre_attack_created,mitre_attack_last_modified,mitre_url,etda_id,etda_name,etda_aliases,etda_first_seen,etda_url,etda_operation_dates,etda_operation_year,country,motivation,victim_industries,victim_countries,mitre_attack_ttps
15,intrusion-set--dc5e2999-ca1a-47d4-8d12-a6984b1...,UNC2452,"[UNC2452, NOBELIUM, StellarParticle, Dark Halo]",2021-01-05,2021-05-04,https://attack.mitre.org/groups/G0118,93ba9804-335e-4782-855d-40af22b93201,"APT 29, Cozy Bear, The Dukes","[APT 29, Cozy Bear, The Dukes, Group 100, Yttr...",2008,https://apt.etda.or.th/cgi-bin/showcard.cgi?u=...,"[2013-02, 2013, 2013, 2014-03, 2015-08, 2016-0...","[2013, 2013, 2013, 2014, 2015, 2016, 2016, 201...",RU,[Information theft and espionage],"[Aerospace, Defense, Education, Energy, Financ...","[Australia, Azerbaijan, Belarus, Belgium, Braz...",[]
29,intrusion-set--899ce53f-13a0-479b-a0e4-67d46e2...,APT29,"[APT29, IRON RITUAL, IRON HEMLOCK, NobleBaron,...",2017-05-31,2023-10-02,https://attack.mitre.org/groups/G0016,93ba9804-335e-4782-855d-40af22b93201,"APT 29, Cozy Bear, The Dukes","[APT 29, Cozy Bear, The Dukes, Group 100, Yttr...",2008,https://apt.etda.or.th/cgi-bin/showcard.cgi?u=...,"[2013-02, 2013, 2013, 2014-03, 2015-08, 2016-0...","[2013, 2013, 2013, 2014, 2015, 2016, 2016, 201...",RU,[Information theft and espionage],"[Aerospace, Defense, Education, Energy, Financ...","[Australia, Azerbaijan, Belarus, Belgium, Braz...","[T1621, T1003.002, T1588.002, T1090.004, T1568..."


# Values
## Industries / sectors

In [44]:
industries = get_industries()
print("Number of actors in data:\t",len(df))
print("Number of industries in data:\t",len(industries))

print('\nNumber of actors per industry:')
industries = df['victim_industries'].explode().value_counts().sort_index().index
industries
for industry in industries:
    print(industry)

Number of actors in data:	 470
Number of industries in data:	 42

Number of actors per industry:
Aerospace
Automotive
Aviation
Casinos and Gambling
Chemical
Construction
Critical infrastructure
Defense
Education
Embassies
Energy
Engineering
Entertainment
Financial
Food and Agriculture
Gaming
Government
Healthcare
High-Tech
Hospitality
IT
Industrial
Law enforcement
Manufacturing
Maritime and Shipbuilding
Media
Mining
NGOs
Non-profit organizations
None Provided
Oil and gas
Online video game companies
Petrochemical
Pharmaceutical
Research
Retail
Satellites
Shipping and Logistics
Technology
Telecommunications
Think Tanks
Transportation
Utilities


## Motivations and victim countries

In [50]:
import numpy as np

print("Motivations:\n",get_motivations())

print("\nVictim Countries")
for country in np.sort(get_countries()):
    print(country)

Motivations:
 ['Financial crime' 'Financial gain' 'Information theft and espionage'
 'Sabotage and destruction']

Victim Countries
ASEAN
Afghanistan
Albania
Algeria
Angola
Antigua and Barbuda
Argentina
Armenia
Australia
Austria
Azerbaijan
Bahamas
Bahrain
Bangladesh
Barbados
Belarus
Belgium
Belize
Benin
Bhutan
Bolivia
Bosnia and Herzegovina
Botswana
Brazil
Brunei
Bulgaria
Burkina Faso
Cambodia
Cameroon
Canada
Central-African Republic
Chad
Chechnya
Chile
China
Colombia
Congo
Costa Rica
Cote d'Ivoire
Croatia
Cuba
Cyprus
Czech
Denmark
Djibouti
Dominican Republic
Dubai
Ecuador
Egypt
El Salvador
Equatorial Guinea
Estonia
Ethiopia
Fiji
Finland
France
Gabon
Georgia
Germany
Ghana
Gibraltar
Greece
Guatemala
Guinea
Guyana
Honduras
Hong Kong
Hungary
Iceland
India
Indonesia
Iran
Iraq
Ireland
Israel
Italy
Jamaica
Japan
Jordan
Kazakhstan
Kenya
Kuwait
Kyrgyzstan
Laos
Latvia
Lebanon
Libya
Lithuania
Luxembourg
Macao
Macedonia
Madagascar
Malaysia
Maldives
Mali
Malta
Mauritania
Mauritius
Mexico
Moldova
Mo