<a href="https://colab.research.google.com/github/LennyHenrydoesGitHub/OSINT/blob/main/model_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import torch
import torch.nn as nn
from google.colab import files, drive
import pandas as pd
import random
drive.mount('/content/drive')

Mounted at /content/drive


### Make dictionary for details about conflicts

In [2]:
df_conflicts = pd.read_csv('/content/drive/My Drive/Data_sci/icb1v15.csv')
df_actors = pd.read_csv('/content/drive/My Drive/Data_sci/icb2v15.csv')
country_codes = pd.read_csv('/content/drive/My Drive/Data_sci/country codes 2.csv')

crisis_details = {}

for index, row in df_conflicts.iterrows():
    crisis_number = row['crisno']  # Crisis number
    crisis_name = row['crisname']  # Crisis name
    start_year = row['yrtrig']  # Start year
    end_year = row['yrterm']  # End year

    crisis_details[crisis_number] = {'crisis_name': crisis_name,
                                      'start_year': start_year,
                                      'end_year': end_year,
                                      'actors': set()}

for index, row in df_actors.iterrows():
    crisis_number = row['crisno']  # Crisis number
    r = row['actor']
    idx = np.where(country_codes == row['actor'])[0] # Actor
    actor = country_codes.iloc[idx[0],0]

    if crisis_number in crisis_details:
        crisis_details[crisis_number]['actors'].add(actor)

print(crisis_details[87])

{'crisis_name': 'OCCUPATION OF IRAN', 'start_year': 1941, 'end_year': 1942.0, 'actors': {'Iran'}}


### Make matrix for conflicts (i) and actors (j), for if a conflict has been started

In [109]:
actors = set()
for _, details in crisis_details.items():
    actors.update(details['actors'])

matrix_data = []
for _, details in crisis_details.items():
    crisis_year = details['start_year']
    crisis_actors = details['actors']
    row_data = {'Year': crisis_year}
    for actor in actors:
        row_data[actor] = 1 if actor in crisis_actors else 0
    matrix_data.append(row_data)

actor_crisis_matrix = pd.DataFrame(matrix_data)

columns = actor_crisis_matrix.columns.tolist()
columns.remove('Year')
columns = ['Year'] + columns

actor_crisis_matrix = actor_crisis_matrix[columns] # Row per conflict
combined_actor_crisis_matrix = actor_crisis_matrix.groupby('Year').sum().reset_index() # Row per year
actor_crisis_matrix = actor_crisis_matrix[actor_crisis_matrix['Year'] >= 1950]

print(actor_crisis_matrix.head())

     Year  Peru  Australia  Benin  Paraguay  Papua New Guinea  Cameroon  \
131  1950     0          0      0         0                 0         0   
132  1950     0          0      0         0                 0         0   
133  1951     0          0      0         0                 0         0   
134  1951     0          0      0         0                 0         0   
135  1951     0          0      0         0                 0         0   

     South Vietnam  Nigeria  Turkey  ...  Algeria  Malawi  Serbia  Bangladesh  \
131              0        0       0  ...        0       0       0           0   
132              0        0       0  ...        0       0       0           0   
133              0        0       0  ...        0       0       0           0   
134              0        0       0  ...        0       0       0           0   
135              0        0       0  ...        0       0       0           0   

     Nicaragua  Jordan  Ethiopia  Djibouti  Guinea  India  
13

### Prepare input data for RNN

In [4]:
df = pd.read_csv('/content/drive/My Drive/Data_sci/all_alphabetical_by_recipient.csv')

categorical_cols = ['Recipient', 'Supplier', 'Weapon designation']
encoder = OneHotEncoder(handle_unknown='ignore')
encoded_data = encoder.fit_transform(df[categorical_cols])

numeric_cols = ['Year of order', 'Number ordered', 'SIPRI TIV per unit', 'SIPRI TIV for total order', 'SIPRI TIV of delivered weapons']
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[numeric_cols])


def extract_arms_trade_data(recipient, target_year):
    data = df[(df['Recipient'] == recipient) & (df['Year of order'] >= target_year - 3) & (df['Year of order'] < target_year)]
    if data.empty:
      return None, None

    encoded_data = encoder.transform(data[categorical_cols])
    scaled_data = scaler.transform(data[numeric_cols])

    input_data = np.concatenate((encoded_data.toarray(), scaled_data), axis=1)

    year_rows = actor_crisis_matrix[actor_crisis_matrix['Year'] == target_year]
    if year_rows[recipient].sum() == 0:
        target = 0
    else:
        target = 1

    return input_data, target




(array([[ 0.        ,  1.        ,  0.        , ..., -0.25502206,
         -0.27955698, -0.27606523],
        [ 0.        ,  1.        ,  0.        , ..., -0.16847345,
         -0.24720897, -0.24119009],
        [ 0.        ,  1.        ,  0.        , ..., -0.26800436,
         -0.27820288, -0.27460534],
        [ 0.        ,  1.        ,  0.        , ..., -0.27124993,
         -0.13436705, -0.11953263],
        [ 0.        ,  1.        ,  0.        , ..., -0.26944683,
         -0.2201269 , -0.2119923 ]]),
 0)

### Generate random samples of data for RNN

In [139]:
def generate_random_samples(actor_matrix, n, ratio):
    random_samples = []

    num_ones = int(n * ratio)
    num_zeros = n - num_ones

    one_indices = actor_matrix[actor_matrix == 1].stack().index.tolist()
    zero_indices = actor_matrix[actor_matrix == 0].stack().index.tolist()

    sampled_one_indices = random.sample(one_indices, num_ones)
    sampled_zero_indices = random.sample(zero_indices, num_zeros)
    sampled_indices = sampled_one_indices + sampled_zero_indices
    random.shuffle(sampled_indices)

    for idx, country in sampled_indices:
        year = actor_matrix.loc[idx, 'Year']
        input_data, target = extract_arms_trade_data(country, year)

        if input_data is not None and target is not None:
            input_data = torch.tensor(input_data, dtype=torch.float32)
            target = torch.tensor(target, dtype=torch.float32)

            if not torch.isnan(input_data).any() and not torch.isnan(target):
                X_rnn = input_data
                y_rnn = target

                random_samples.append((X_rnn, y_rnn, country, year))

    return random_samples

n = 1000
ratio = 0.5  # share of data that is 1s
random_samples = generate_random_samples(actor_crisis_matrix, n, ratio)


### Define the RNN model

In [145]:
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        out = self.sigmoid(out)
        return out

input_size = random_samples[0][0].shape[1]
output_size = 1
hidden_size = 64
num_layers = 1

model = RNNModel(input_size, hidden_size, output_size, num_layers)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


### Training, validation, and testing


In [146]:
num_epochs = 20
train_size = int(0.8 * len(random_samples))
val_size = int(0.1 * len(random_samples))
test_size = len(random_samples) - train_size - val_size

train_set, val_set, test_set = random_samples[:train_size], random_samples[train_size:train_size + val_size], random_samples[-test_size:]

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    for X_rnn, y_rnn, _, _ in train_set:
        optimizer.zero_grad()
        X_rnn = X_rnn.unsqueeze(0)
        outputs = model(X_rnn)
        predictions = (outputs.squeeze() > 0.5).float()
        loss = criterion(outputs.squeeze(), y_rnn)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        correct_predictions += (predictions == y_rnn).sum().item()
        total_samples += 1

    # Validation
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0
    val_correct_predictions = 0
    val_total_samples = 0

    with torch.no_grad():
        for X_val, y_val, _, _ in val_set:
            X_val = X_val.unsqueeze(0)
            outputs_val = model(X_val)
            predictions_val = (outputs_val.squeeze() > 0.5).float()
            val_loss += criterion(outputs_val.squeeze(), y_val).item()
            val_correct_predictions += (predictions_val == y_val).sum().item()
            val_total_samples += 1

    epoch_loss = running_loss / len(train_set)
    val_accuracy = val_correct_predictions / val_total_samples

    print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {epoch_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

# Testing
model.eval()  # Set model to evaluation mode
test_correct_predictions = 0
test_total_samples = 0

with torch.no_grad():
    for X_test, y_test, _, _ in test_set:
        X_test = X_test.unsqueeze(0)
        outputs_test = model(X_test)
        predictions_test = (outputs_test.squeeze() > 0.5).float()
        test_correct_predictions += (predictions_test == y_test).sum().item()
        test_total_samples += 1

test_accuracy = test_correct_predictions / test_total_samples
print(f'Testing Accuracy: {test_accuracy:.4f}')

Epoch [1/20], Training Loss: 0.6780, Validation Accuracy: 0.6000
Epoch [2/20], Training Loss: 0.5106, Validation Accuracy: 0.6143
Epoch [3/20], Training Loss: 0.2869, Validation Accuracy: 0.5571
Epoch [4/20], Training Loss: 0.1410, Validation Accuracy: 0.6286
Epoch [5/20], Training Loss: 0.0869, Validation Accuracy: 0.5714
Epoch [6/20], Training Loss: 0.0484, Validation Accuracy: 0.6429
Epoch [7/20], Training Loss: 0.0507, Validation Accuracy: 0.6143
Epoch [8/20], Training Loss: 0.0319, Validation Accuracy: 0.6429
Epoch [9/20], Training Loss: 0.0283, Validation Accuracy: 0.6143
Epoch [10/20], Training Loss: 0.0257, Validation Accuracy: 0.6429
Epoch [11/20], Training Loss: 0.0241, Validation Accuracy: 0.6286
Epoch [12/20], Training Loss: 0.0229, Validation Accuracy: 0.6429
Epoch [13/20], Training Loss: 0.0222, Validation Accuracy: 0.6714
Epoch [14/20], Training Loss: 0.0211, Validation Accuracy: 0.6143
Epoch [15/20], Training Loss: 0.0196, Validation Accuracy: 0.6571
Epoch [16/20], Trai