In [19]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau

import matplotlib.pyplot as plt
%matplotlib inline

In [20]:
# Set random seed for reproducibility
torch.manual_seed(31)
np.random.seed(31)

In [21]:
# Load the CSV file
df = pd.read_csv('north_korea_missile_test_database.csv')

df = df.drop(columns = ["F1","Date", "Date Entered/Updated", "Other Name", "Additional Information", "Source(s)"], axis = 1, errors = "ignore")


In [22]:
# Clean data
df = df.replace(['Unknown', ''], np.nan)

# Clean numerical columns
for col in ['Apogee', 'Distance Travelled','Facility Latitude', 'Facility Longitude']:
    if col in df.columns:
        df[col] = df[col].astype(str).str.replace(' km', '', regex=False).str.replace(',', '', regex=False)
        df[col] = pd.to_numeric(df[col], errors='coerce')

# Handle missing values
numerical_cols = ['Facility Latitude', 'Facility Longitude', 'Apogee', 'Distance Travelled']
categorical_cols = ["Launch Time (UTC)",'Missile Type', 'Launch Agency/Authority', 
                   'Facility Name', 'Facility Location', 'Landing Location', 
                   'Confirmation Status', 'Test Outcome']

for col in numerical_cols:
    if col in df.columns:
        df[col] = df[col].fillna(df[col].mean())

for col in categorical_cols:
    if col in df.columns:
        df[col] = df[col].fillna('Unknown')
print(df.dtypes)
df.tail(50)


Launch Time (UTC)           object
Missile Name                object
Missile Type                object
Launch Agency/Authority     object
Facility Name               object
Facility Location           object
Facility Latitude          float64
Facility Longitude         float64
Landing Location            object
Apogee                     float64
Distance Travelled         float64
Confirmation Status         object
Test Outcome                object
dtype: object


Unnamed: 0,Launch Time (UTC),Missile Name,Missile Type,Launch Agency/Authority,Facility Name,Facility Location,Facility Latitude,Facility Longitude,Landing Location,Apogee,Distance Travelled,Confirmation Status,Test Outcome
85,08:44:00 PM,Scud-C,SRBM,Hwasong Artillery Units of the KPA Strategic F...,Hwangju,"Hwangju, North Hwanghae province",38.686834,125.702005,Unknown,384.627273,629.103448,Confirmed,Success
86,08:58:00 PM,Scud-C,SRBM,Hwasong Artillery Units of the KPA Strategic F...,Hwangju,"Hwangju, North Hwanghae province",38.686834,125.702005,Unknown,384.627273,629.103448,Confirmed,Success
87,09:35:00 PM,Nodong,MRBM,Hwasong Artillery Units of the KPA Strategic F...,Hwangju,"Hwangju, North Hwanghae province",38.686834,125.702005,Sea of Japan or East Sea,384.627273,500.0,Confirmed,Success
88,10:53:00 PM,Nodong,MRBM,Unknown,Hwangju,"Hwangju, North Hwanghae province",38.686834,125.702005,Sea of Japan or East Sea,384.627273,1000.0,Confirmed,Success
89,10:53:00 PM,Nodong,MRBM,Unknown,Hwangju,"Hwangju, North Hwanghae province",38.686834,125.702005,Unknown,384.627273,629.103448,Confirmed,Failure
90,08:29:00 PM,Pukguksong-1,SLBM,Unknown,Sinpo Shipyard,South Hamgyong province,40.0368,128.1839,Sea of Japan or East Sea,384.627273,500.0,Confirmed,Success
91,03:13:00 AM,ER Scud,MRBM,Hwasong Artillery Units of the KPA Strategic F...,Hwangju,"Hwangju, North Hwanghae province",38.686834,125.702005,Sea of Japan or East Sea,384.627273,1000.0,Confirmed,Success
92,03:13:00 AM,ER Scud,MRBM,Hwasong Artillery Units of the KPA Strategic F...,Hwangju,"Hwangju, North Hwanghae province",38.686834,125.702005,Sea of Japan or East Sea,384.627273,1000.0,Confirmed,Success
93,03:13:00 AM,ER Scud,MRBM,Hwasong Artillery Units of the KPA Strategic F...,Hwangju,"Hwangju, North Hwanghae province",38.686834,125.702005,Sea of Japan or East Sea,384.627273,1000.0,Confirmed,Success
94,06:33:00 PM,Musudan,IRBM,Unknown,Panghyon Airbase,"Kusong, North Pyongan",39.927472,125.207889,Unknown,384.627273,629.103448,Confirmed,Failure


In [23]:
# Target
target = 'Missile Name'
y = df[target]
X = df.drop(columns=[target])


In [24]:
# Encode categoricals with LabelEncoder
label_encoders = {}
for col in categorical_cols:
    if col in X.columns:
        label = LabelEncoder()
        X[col] = label.fit_transform(X[col].astype(str))
        label_encoders[col] = label 
# Scale numerical features
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

# Encode target
label_target = LabelEncoder()
y_encoded = label_target.fit_transform(y)

In [25]:
# Split data: 60% train, 20% val, 20% test
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=31)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, random_state=31)

# Convert to tensors
X_train_tensor = torch.FloatTensor(X_train.values)
y_train_tensor = torch.LongTensor(y_train)
X_val_tensor = torch.FloatTensor(X_val.values)
y_val_tensor = torch.LongTensor(y_val)
X_test_tensor = torch.FloatTensor(X_test.values)
y_test_tensor = torch.LongTensor(y_test)


In [26]:
# Datasets
batch_size = 32  # Adjust based on dataset size
train_ds = TensorDataset(X_train_tensor, y_train_tensor)
val_ds = TensorDataset(X_val_tensor, y_val_tensor)
test_ds = TensorDataset(X_test_tensor, y_test_tensor)

# Dataloaders
train_loader = DataLoader(train_ds, batch_size = batch_size, shuffle = True)
val_loader = DataLoader(val_ds, batch_size = batch_size)
test_loader = DataLoader(test_ds, batch_size = batch_size)


In [27]:
# Define the neural network
class MissileClassifier(nn.Module):
    def __init__(self, input_size, num_classes, h1 = 64, h2 = 32):
        super().__init__()
        self.fc1 = nn.Linear(input_size, h1)
        self.bn1 = nn.BatchNorm1d(h1) # Adding batch normalization
        self.dropout1 = nn.Dropout(0.5)  # Adding a dropout layer to regulate overfitting
        self.fc2 = nn.Linear(h1, h2)
        self.bn2 = nn.BatchNorm1d(h2)
        self.dropout2 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(h2, num_classes)
       
          
    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        return self.fc3(x)

input_size = X.shape[1]
num_classes = len(label_target.classes_)
model = MissileClassifier(input_size, num_classes)

In [28]:
# Loss with class weights, optimizer with weight decay, scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  


In [29]:
# Training loop
epochs = 100  
losses = []
for epoch in range(epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
         # Keep track of our losses
        losses.append(loss.detach().numpy())
        loss.backward()
        optimizer.step()

       
    
    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
    val_loss /= len(val_loader)
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch}/{epochs}, Val Loss: {val_loss:.4f}')


Epoch 0/100, Val Loss: 3.2936
Epoch 10/100, Val Loss: 2.7092
Epoch 20/100, Val Loss: 2.4883
Epoch 30/100, Val Loss: 2.3751
Epoch 40/100, Val Loss: 2.3392
Epoch 50/100, Val Loss: 2.3050
Epoch 60/100, Val Loss: 2.2522
Epoch 70/100, Val Loss: 2.1836
Epoch 80/100, Val Loss: 2.1265
Epoch 90/100, Val Loss: 2.0773


In [30]:
# Test the model
model.eval()
y_pred = []
y_true = []
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        y_pred.extend(predicted.numpy())
        y_true.extend(labels.numpy())

accuracy = accuracy_score(y_true, y_pred)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

Test Accuracy: 74.07%
