In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import tensorflow as tf

In [1]:
from google.colab import files
uploaded = files.upload()


Saving missiles-acled-18-october-2023.csv to missiles-acled-18-october-2023.csv


In [1]:
import pandas as pd
data = pd.read_csv('missiles-acled-18-october-2023.csv', sep=';')


In [2]:
print(data.columns.tolist())


['event_id_cnty', 'event_date', 'year', 'time_precision', 'disorder_type', 'event_type', 'sub_event_type', 'actor1', 'assoc_actor_1', 'inter1', 'interaction', 'civilian_targeting', 'iso', 'region', 'country', 'admin1', 'admin2', 'admin3', 'location', 'latitude', 'longitude', 'geo_precision', 'source', 'source_scale', 'notes', 'fatalities', 'tags', 'timestamp', 'population_best']


In [3]:
data['event_date'] = pd.to_datetime(data['event_date'], errors='coerce')
data['day'] = data['event_date'].dt.day
data['month'] = data['event_date'].dt.month
data['year'] = data['event_date'].dt.year

In [4]:
data = data.drop(columns=['event_date', 'notes', 'source', 'tags'])

In [7]:
# Encode categorical columns (example: disorder_type, event_type, actor1)
categorical_cols = ['assoc_actor_1', 'interaction', 'civilian_targeting', 'admin3', 'location', 'source_scale']
data[categorical_cols] = data[categorical_cols].fillna('Unknown')

ohe = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded = ohe.fit_transform(data[categorical_cols])


In [8]:
# Drop original categorical columns and add encoded
data = data.drop(columns=categorical_cols)
encoded_df = pd.DataFrame(encoded, columns=ohe.get_feature_names_out(categorical_cols))
data = pd.concat([data.reset_index(drop=True), encoded_df.reset_index(drop=True)], axis=1)

In [9]:
# Fill missing numeric with 0 (or another strategy)
data = data.fillna(0)

In [10]:
# Target: fatalities (for regression)
X = data.drop(columns=['fatalities', 'event_id_cnty', 'timestamp'])
y = data['fatalities']

In [12]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split

# Identify categorical and numeric columns
numeric_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()

# Fill missing values in categorical columns (optional but recommended)
X[categorical_cols] = X[categorical_cols].fillna('Unknown')

# Create a ColumnTransformer that applies OneHotEncoder to categorical columns
# and StandardScaler to numeric columns
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ]
)

# Fit and transform X
X_processed = preprocessor.fit_transform(X)

# Now you can split into train/test
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)


In [22]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

# Split train/val if not done yet
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

model = Sequential([
    Dense(128, kernel_regularizer=l2(0.001), input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    LeakyReLU(alpha=0.1),
    Dropout(0.3),

    Dense(64, kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    LeakyReLU(alpha=0.1),
    Dropout(0.3),

    Dense(1)  # output layer for regression
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    callbacks=[early_stop]
)


Epoch 1/50
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - loss: 11.6749 - mae: 1.0720 - val_loss: 1.6221 - val_mae: 0.5387
Epoch 2/50
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 6.0061 - mae: 0.8230 - val_loss: 1.7119 - val_mae: 0.6286
Epoch 3/50
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 7.1740 - mae: 0.8622 - val_loss: 1.7696 - val_mae: 0.5734
Epoch 4/50
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 6.9239 - mae: 0.8087 - val_loss: 2.0402 - val_mae: 0.6356
Epoch 5/50
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 7.6357 - mae: 0.8852 - val_loss: 2.0095 - val_mae: 0.5916
Epoch 6/50
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 5.5616 - mae: 0.7608 - val_loss: 2.0092 - val_mae: 0.5942


In [23]:
loss, mae = model.evaluate(X_test, y_test)
print(f"Test MAE: {mae}")

[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3.6384 - mae: 0.5990
Test MAE: 0.5682174563407898
