In [1]:
import pandas as pd
import numpy as np
import requests
import json

In [2]:
# data fetching 

class APIFetcher:
    def __init__(self):
        self.api_keys = {
            'cyber': 'b7aceffb1657bfac01d8cc7563d5e91c91ee239bfe2c99e30ab1e0030ee9e4b4'
        }

        self.urls = {
            'conflict': 'https://services8.arcgis.com/xu983xJB6fIDCjpX/arcgis/rest/services/ACLED/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson',
            'cyber': 'https://otx.alienvault.com/api/v1/pulses/subscribed'
        }

    def fetch(self, name):
        headers = {}
        if name == 'cyber':
            headers['X-OTX-API-KEY'] = self.api_keys['cyber']

        try:
            response = requests.get(self.urls[name], headers=headers, timeout=10)
            response.raise_for_status()
            print(f"[INFO] Successfully fetched {name} data")
            return response.json()
        except requests.RequestException as e:
            print(f"[ERROR] Failed to fetch '{name}': {e}")
            return None

# Example Usage
fetcher = APIFetcher()
conflict_data = fetcher.fetch('conflict')
cyber_data = fetcher.fetch('cyber')


[INFO] Successfully fetched conflict data
[INFO] Successfully fetched cyber data


In [3]:
print("conflict data -> ",conflict_data.keys())
print("cyber data ->",cyber_data.keys())

conflict data ->  dict_keys(['type', 'properties', 'features'])
cyber data -> dict_keys(['results', 'count', 'prefetch_pulse_ids', 't', 't2', 't3', 'previous', 'next'])


DATA GATHERING

In [4]:
# parsing conflict data for merging

conflict_events = []

for event in conflict_data['features']:
    props = event['properties']
    conflict_events.append({
        'Country':props.get('country'),
        'Protests':props.get('protests'),
        'Riots': props.get('riots'),
        'Admin': props.get('admin1')
    })


import pandas as pd
conflict_df = pd.DataFrame(conflict_events)    

In [5]:
normalized_cyber = []

for event in cyber_data.get('results', []):
    for indicator in event.get('indicators', []):
        normalized_cyber.append({
            'source': 'cyber',
            'country': 'Global',  # Could infer from title/description later
            'region': 'Cyber Domain',
            'date': event.get('created'),
            'threat_type': indicator.get('type'),
            'value': indicator.get('indicator')
        })

cyber_df = pd.DataFrame(normalized_cyber)


In [6]:
cyber_df

Unnamed: 0,source,country,region,date,threat_type,value
0,cyber,Global,Cyber Domain,2025-06-03T18:25:37.033000,FileHash-MD5,34546a79de045b7ee4c0c8d4cbeb6778
1,cyber,Global,Cyber Domain,2025-06-03T18:25:37.033000,FileHash-MD5,91f5009c786618bbbd798ee777b061e3
2,cyber,Global,Cyber Domain,2025-06-03T18:25:37.033000,FileHash-MD5,f73f1a694d2a5c7e6d04fbc866a916bd
3,cyber,Global,Cyber Domain,2025-06-03T18:25:37.033000,FileHash-SHA1,184e40229e2c62087aa182075c6efc748953df0a
4,cyber,Global,Cyber Domain,2025-06-03T18:25:37.033000,FileHash-SHA1,555128ccc53e6beae6f695b5ea903daab4a41250
5,cyber,Global,Cyber Domain,2025-06-03T18:25:37.033000,FileHash-SHA1,f8f63044cfe387aff0b245da80f407570aedd660
6,cyber,Global,Cyber Domain,2025-06-03T18:25:37.033000,FileHash-SHA256,256f4e0fc5ac1d12c77223673088536acbbe02757aa3d5...
7,cyber,Global,Cyber Domain,2025-06-03T18:25:37.033000,FileHash-SHA256,600781bc13875d80026910e12f80b88ba474fe88017daf...
8,cyber,Global,Cyber Domain,2025-06-03T18:25:37.033000,FileHash-SHA256,e11e509039bb45fb827f6a36c804fcd8220338672c847d...
9,cyber,Global,Cyber Domain,2025-06-03T19:16:56.281000,domain,bkngnet.com


Integrating with scikit learn 

In [54]:
# predicring with cyber indicators -- predicting threats 

import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# encode categorical values
encoders = {}

for col in ['country', 'region', 'value', 'threat_type']:
    le = LabelEncoder()
    cyber_df[col] = le.fit_transform(cyber_df[col].astype(str))
    encoders[col] = le
    
X = cyber_df[['country','region','value']]
y = cyber_df['threat_type']

# spliting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# classifier
model = RandomForestClassifier()
model.fit(X_train, y_train)

# checks marks
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

# 7. Predict new cyber event
new_event = pd.DataFrame([{
    'country': 'India',
    'region': 'Asia',
    'value': 'a1b2c3'
}])


def encode_fallback(encoder,value):
    if value not in encoder.classes_:
        return encoder.transform([value])[0]
    else:
        encoder.classes_ = np.append(encoder.classes_, value)
        return encoder.transform([value])[0]

# 8. Encode new event using stored encoders
for col in ['country', 'region', 'value']:
    new_event[col] = new_event[col].apply(lambda val: encode_with_fallback(encoders[col], val))

# 9. Predict threat type
predicted = model.predict(new_event)
predicted_label = encoders['threat_type'].inverse_transform(predicted)

print("Predicted Threat Type:", predicted_label[0])

Accuracy: 0.0
Predicted Threat Type: 0


In [20]:
print(cyber_df['threat_type'].value_counts())


threat_type
4    15
2     7
5     5
0     4
1     4
3     1
Name: count, dtype: int64


In [22]:
conflict_df

Unnamed: 0,Country,Protests,Riots,Admin
0,Iraq,0,0,Duhok
1,Colombia,24,8,"Bogota, D.C."
2,Iraq,0,0,Duhok
3,Colombia,0,0,"Bogota, D.C."
4,Iraq,0,0,Duhok
...,...,...,...,...
995,Bolivia,33,7,Cochabamba
996,Bolivia,29,2,Cochabamba
997,Bolivia,18,3,Cochabamba
998,Bolivia,14,3,Cochabamba
