In [52]:
import pandas as pd
import numpy as np
import requests
import json

In [53]:
# data fetching 

class APIFetcher:
    def __init__(self):
        self.api_keys = {
            'cyber': 'b7aceffb1657bfac01d8cc7563d5e91c91ee239bfe2c99e30ab1e0030ee9e4b4'
        }

        self.urls = {
            'conflict': 'https://services8.arcgis.com/xu983xJB6fIDCjpX/arcgis/rest/services/ACLED/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson',
            'cyber': 'https://otx.alienvault.com/api/v1/pulses/subscribed'
        }

    def fetch(self, name):
        headers = {}
        if name == 'cyber':
            headers['X-OTX-API-KEY'] = self.api_keys['cyber']

        try:
            response = requests.get(self.urls[name], headers=headers, timeout=10)
            response.raise_for_status()
            print(f"[INFO] Successfully fetched {name} data")
            return response.json()
        except requests.RequestException as e:
            print(f"[ERROR] Failed to fetch '{name}': {e}")
            return None

# Example Usage
fetcher = APIFetcher()
conflict_data = fetcher.fetch('conflict')
cyber_data = fetcher.fetch('cyber')


[INFO] Successfully fetched conflict data
[INFO] Successfully fetched cyber data


In [54]:
print("conflict data -> ",conflict_data.keys())
print("cyber data ->",cyber_data.keys())

conflict data ->  dict_keys(['type', 'properties', 'features'])
cyber data -> dict_keys(['results', 'count', 'prefetch_pulse_ids', 't', 't2', 't3', 'previous', 'next'])


DATA GATHERING

In [104]:
# parsing conflict data for merging

conflict_events = []

for event in conflict_data['features']:
    props = event['properties']
    conflict_events.append({
        'Country':props.get('country'),
        'Protests':props.get('protests'),
        'Riots': props.get('riots'),
        'Admin': props.get('admin1')
    })


import pandas as pd
conflict_df = pd.DataFrame(conflict_events)    

In [131]:
normalized_cyber = []

for event in cyber_data.get('results', []):
    for indicator in event.get('indicators', []):
        normalized_cyber.append({
            'source': 'cyber',
            'country': 'Global',  # Could infer from title/description later
            'region': 'Cyber Domain',
            'date': event.get('created'),
            'threat_type': indicator.get('type'),
            'value': indicator.get('indicator')
        })

cyber_df = pd.DataFrame(normalized_cyber)


In [112]:
conflict_df.head(11)

Unnamed: 0,Country,Protests,Riots,Admin
0,Iraq,0,0,Duhok
1,Colombia,24,8,"Bogota, D.C."
2,Iraq,0,0,Duhok
3,Colombia,0,0,"Bogota, D.C."
4,Iraq,0,0,Duhok
5,Iraq,1,0,Duhok
6,Colombia,2,0,Huila
7,Colombia,1,0,Huila
8,Iraq,0,1,Duhok
9,Iraq,0,0,Duhok


In [135]:
import pandas as pd

# Assume cyber_data is already fetched and stored in `cyber_data`
cyber_events = cyber_data.get('results', [])

# Normalize indicators from each event
normalized_cyber = []
for event in cyber_events:
    indicators = event.get('indicators', [])
    for ind in indicators:
        normalized_cyber.append({
            'country': 'Global',  # Most cyber indicators don’t have a country directly
            'region': 'Cyber Domain',
            'indicator_id': ind.get('id'),
            'indicator_type': ind.get('type'),
            'indicator_value': ind.get('indicator'),
            'description': ind.get('description'),
            'created': ind.get('created')
        })

cyber_df = pd.DataFrame(normalized_cyber)


In [136]:
cyber_df.head(11)

Unnamed: 0,country,region,indicator_id,indicator_type,indicator_value,description,created
0,Global,Cyber Domain,4063067491,FileHash-MD5,1f276e6545d92a0607dee715b594ef8d,MD5 of 8414136128f73fa7e29032df7b8115bc89832c5...,2025-05-01T20:46:33
1,Global,Cyber Domain,4063067492,FileHash-MD5,60554308955996496aa1e7c4e4399816,MD5 of b26e8e0be066ee0b86f8fb2b0a703717ebbf34c...,2025-05-01T20:46:33
2,Global,Cyber Domain,4063067493,FileHash-MD5,67165600be58fc451de2059d1d754353,MD5 of 66edb72f6f7c8cad23c6659a81fa023f57c1a86...,2025-05-01T20:46:33
3,Global,Cyber Domain,3770388493,FileHash-MD5,bebbeba37667453003d2372103c45bbf,MD5 of 1ab42121bb45028a17a3438b65a3634adb7d673...,2025-05-01T20:46:33
4,Global,Cyber Domain,4063067494,FileHash-SHA1,136076ee6164f20feb4bb322fe0656bc755ebdaf,SHA1 of 66edb72f6f7c8cad23c6659a81fa023f57c1a8...,2025-05-01T20:46:33
5,Global,Cyber Domain,3770741143,FileHash-SHA1,34894d5ffa541ab159b69a2fe0937a5430dac545,SHA1 of 1ab42121bb45028a17a3438b65a3634adb7d67...,2025-05-01T20:46:33
6,Global,Cyber Domain,4063067495,FileHash-SHA1,49b5260daa9a920537fb240363e85d49719d6fd4,SHA1 of b26e8e0be066ee0b86f8fb2b0a703717ebbf34...,2025-05-01T20:46:33
7,Global,Cyber Domain,4063067496,FileHash-SHA1,720d744310bede34a011205006e03be4b9d491cd,SHA1 of 8414136128f73fa7e29032df7b8115bc89832c...,2025-05-01T20:46:33
8,Global,Cyber Domain,4063067497,FileHash-SHA256,004adec667373bdf6146e05b9a1c6e0c63941afd38e30c...,,2025-05-01T20:46:33
9,Global,Cyber Domain,4063067498,FileHash-SHA256,0a50587785bf821d224885cbfc65c5fd251b3e43cda90c...,,2025-05-01T20:46:33


In [137]:
#  merging both group
import pandas as pd

# Assuming conflict_df and cyber_df are both pandas DataFrames
df = pd.concat([conflict_df, cyber_df], ignore_index=True)

df.fillna({
    'country': 'Unknown',
    'region': 'Unknown',
    'battles': 0,
    'fatalities': 0,
    'indicator_value': 'None'
}, inplace=True)


In [138]:
df.head()

Unnamed: 0,Country,Protests,Riots,Admin,country,region,indicator_id,indicator_type,indicator_value,description,created
0,Iraq,0.0,0.0,Duhok,Unknown,Unknown,,,,,
1,Colombia,24.0,8.0,"Bogota, D.C.",Unknown,Unknown,,,,,
2,Iraq,0.0,0.0,Duhok,Unknown,Unknown,,,,,
3,Colombia,0.0,0.0,"Bogota, D.C.",Unknown,Unknown,,,,,
4,Iraq,0.0,0.0,Duhok,Unknown,Unknown,,,,,
