In [140]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
%matplotlib inline

In [141]:
# Set random seed for reproducibility
torch.manual_seed(31)
np.random.seed(31)

In [142]:
# Load the CSV file
df = pd.read_csv('north_korea_missile_test_database.csv')

df = df.drop(columns = ["F1", "Date", "Date Entered/Updated", "Other Name", "Additional Information", "Source(s)"], axis = 1, errors = "ignore")

df["Missile Name"].value_counts()

Missile Name
Scud-C          27
KN-02           20
Nodong          15
Scud-B          10
Musudan          8
ER Scud          8
KN-23            8
Hwasong-12       6
Pukguksong-1     6
KN-25            5
KN-24            4
Unha-3           3
Scud-B MaRV      3
Hwasong-14       2
Unknown          2
Pukguksong-2     2
Unha             2
Taepodong-1      1
Scud-C MaRV      1
Hwasong-15       1
Pukguksong-3     1
Name: count, dtype: int64

In [143]:
# Clean data
df = df.replace(['Unknown', ''], np.nan)

# Clean numerical columns
for col in ['Apogee', 'Distance Travelled','Facility Latitude', 'Facility Longitude']:
    if col in df.columns:
        df[col] = df[col].astype(str).str.replace(' km', '', regex=False).str.replace(',', '', regex=False)
        df[col] = pd.to_numeric(df[col], errors='coerce')

# Handle missing values
numerical_cols = ['Facility Latitude', 'Facility Longitude', 'Apogee', 'Distance Travelled']
categorical_cols = ['Launch Time (UTC)', 'Missile Type', 'Launch Agency/Authority', 
                   'Facility Name', 'Facility Location', 'Landing Location', 
                   'Confirmation Status', 'Test Outcome']

for col in numerical_cols:
    if col in df.columns:
        df[col] = df[col].fillna(df[col].mean())

for col in categorical_cols:
    if col in df.columns:
        df[col] = df[col].fillna('Unknown')


In [144]:
# Target
target = 'Missile Name'
y = df[target]
X = df.drop(columns=[target])


In [145]:
# Encode categoricals with LabelEncoder
label_encoders = {}
for col in categorical_cols:
    if col in X.columns:
        label = LabelEncoder()
        X[col] = label.fit_transform(X[col].astype(str))
        label_encoders[col] = label 

# Scale numerical features
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

# Encode target
label_target = LabelEncoder()
y_encoded = label_target.fit_transform(y)

In [146]:
# Split data: 60% train, 20% val, 20% test
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=31)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, random_state=31)

# Convert to tensors
X_train_tensor = torch.FloatTensor(X_train.values)
y_train_tensor = torch.LongTensor(y_train)
X_val_tensor = torch.FloatTensor(X_val.values)
y_val_tensor = torch.LongTensor(y_val)
X_test_tensor = torch.FloatTensor(X_test.values)
y_test_tensor = torch.LongTensor(y_test)


In [147]:
# Datasets
batch_size = 16  # Adjust based on dataset size
train_ds = TensorDataset(X_train_tensor, y_train_tensor)
val_ds = TensorDataset(X_val_tensor, y_val_tensor)
test_ds = TensorDataset(X_test_tensor, y_test_tensor)

# Dataloaders
train_loader = DataLoader(train_ds, batch_size = batch_size, shuffle = True)
val_loader = DataLoader(val_ds, batch_size = batch_size)
test_loader = DataLoader(test_ds, batch_size = batch_size)


In [153]:
# Define the neural network
class MissileClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

input_size = X.shape[1]
num_classes = len(label_target.classes_)
model = MissileClassifier(input_size, num_classes)

In [154]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [162]:
# Training loop
epochs = 100  # Adjust as needed
for epoch in range(epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
    val_loss /= len(val_loader)
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Val Loss: {val_loss:.4f}')



Epoch 1/100, Val Loss: 3.2924
Epoch 11/100, Val Loss: 3.2135
Epoch 21/100, Val Loss: 3.3391
Epoch 31/100, Val Loss: 3.3959
Epoch 41/100, Val Loss: 3.3850
Epoch 51/100, Val Loss: 3.4547
Epoch 61/100, Val Loss: 3.3967
Epoch 71/100, Val Loss: 3.5881
Epoch 81/100, Val Loss: 3.4990
Epoch 91/100, Val Loss: 3.5764


In [163]:
# Test the model
model.eval()
y_pred = []
y_true = []
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        y_pred.extend(predicted.numpy())
        y_true.extend(labels.numpy())

accuracy = accuracy_score(y_true, y_pred)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

Test Accuracy: 77.78%
