In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow.keras import layers, models



ModuleNotFoundError: No module named 'tensorflow'

In [None]:
# Load the dataset
data = pd.read_csv('missiles-acled-18-october-2023.csv', delimiter=';')

In [None]:
# Filter relevant missile attack events
data = data[(data['sub_event_type'].isin(['Shelling/artillery/missile attack', 'Air/drone strike'])) &
            (data['actor1'] == 'Military Forces of Israel (2022-)')]

In [None]:
# Handle missing values
data['latitude'] = data['latitude'].fillna(data['latitude'].mean())
data['longitude'] = data['longitude'].fillna(data['longitude'].mean())
data['fatalities'] = data['fatalities'].fillna(0)

In [None]:
# Aggregate data by location
agg_data = data.groupby('location').agg({
    'event_id_cnty': 'count',
    'fatalities': 'sum',
    'latitude': 'mean',
    'longitude': 'mean'
}).reset_index()

agg_data.rename(columns={'event_id_cnty': 'missile_attack_count', 'fatalities': 'total_fatalities'}, inplace=True)

# PTSD Likelihood Proxy Label
agg_data['ptsd_likelihood'] = agg_data.apply(
    lambda row: 1 if row['missile_attack_count'] > 3 and row['total_fatalities'] > 1 else 0, axis=1
)

# Feature Engineering
agg_data['attack_fatality_interaction'] = agg_data['missile_attack_count'] * agg_data['total_fatalities']
agg_data['log_missile_attack_count'] = np.log1p(agg_data['missile_attack_count'])
agg_data['log_total_fatalities'] = np.log1p(agg_data['total_fatalities'])

features = ['missile_attack_count', 'total_fatalities',
            'attack_fatality_interaction', 'log_missile_attack_count', 'log_total_fatalities']
X = agg_data[features]
y = agg_data['ptsd_likelihood']

In [None]:
# Scaling the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

In [None]:
# SMOTE to balance the data
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

In [None]:
# Transformer model for tabular data
input_layer = layers.Input(shape=(X_train_smote.shape[1],))

In [None]:
# Project features into a dense space (simulate embedding)
x = layers.Dense(128, activation='relu')(input_layer)
x = tf.expand_dims(x, axis=1)  # Make input 3D for attention

# Transformer block
attention_output = layers.MultiHeadAttention(num_heads=4, key_dim=128)(x, x)
attention_output = layers.GlobalAveragePooling1D()(attention_output)

In [None]:
# Feedforward network
x = layers.Dense(128, activation='relu')(attention_output)
x = layers.Dropout(0.1)(x)
output = layers.Dense(1, activation='sigmoid')(x)

In [None]:
# Compile model
model = models.Model(inputs=input_layer, outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:

# Train the model
model.fit(X_train_smote, y_train_smote, epochs=10, batch_size=16, validation_data=(X_test, y_test))

In [None]:
# Evaluate
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.4f}")