<a href="https://colab.research.google.com/github/LennyHenrydoesGitHub/OSINT/blob/main/model_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import torch
import torch.nn as nn
from google.colab import files, drive
import pandas as pd
drive.mount('/content/drive')

Mounted at /content/drive


### Make dictionary for details about conflicts

In [51]:
df_conflicts = pd.read_csv('/content/drive/My Drive/Data_sci/icb1v15.csv')
df_actors = pd.read_csv('/content/drive/My Drive/Data_sci/icb2v15.csv')
country_codes = pd.read_csv('/content/drive/My Drive/Data_sci/country codes 2.csv')

crisis_details = {}

for index, row in df_conflicts.iterrows():
    crisis_number = row['crisno']  # Crisis number
    crisis_name = row['crisname']  # Crisis name
    start_year = row['yrtrig']  # Start year
    end_year = row['yrterm']  # End year

    crisis_details[crisis_number] = {'crisis_name': crisis_name,
                                      'start_year': start_year,
                                      'end_year': end_year,
                                      'actors': set()}

for index, row in df_actors.iterrows():
    crisis_number = row['crisno']  # Crisis number
    r = row['actor']
    idx = np.where(country_codes == row['actor'])[0] # Actor
    actor = country_codes.iloc[idx[0],0]

    if crisis_number in crisis_details:
        crisis_details[crisis_number]['actors'].add(actor)

print(crisis_details[27])

{'crisis_name': 'RUHR I', 'start_year': 1923, 'end_year': 1923.0, 'actors': {'Poland', 'Germany (Prussia)', 'Netherlands'}}


In [45]:

idx = np.where(country_codes == 'AAB')[0]
a = country_codes.iloc[idx[0],0]


### Make matrix for years (i) and actors (j), for if a conflict has been started

In [3]:
actors = set()
for _, details in crisis_details.items():
    actors.update(details['actors'])

matrix_data = []
for _, details in crisis_details.items():
    crisis_year = details['start_year']
    crisis_actors = details['actors']
    row_data = {'Year': crisis_year}
    for actor in actors:
        row_data[actor] = 1 if actor in crisis_actors else 0
    matrix_data.append(row_data)

actor_crisis_matrix = pd.DataFrame(matrix_data)

columns = actor_crisis_matrix.columns.tolist()
columns.remove('Year')
columns = ['Year'] + columns

actor_crisis_matrix = actor_crisis_matrix[columns] # Row per conflict
combined_actor_crisis_matrix = actor_crisis_matrix.groupby('Year').sum().reset_index() # Row per year

print(combined_actor_crisis_matrix.head())

   Year  IND  AZE  SEN  CAN  HIJ  HUN  IRQ  TUN  SLV  ...  DRV  TAZ  NTH  ZIM  \
0  1918    0    0    0    0    0    0    0    0    0  ...    0    0    0    0   
1  1919    0    0    0    0    0    1    0    0    0  ...    0    0    0    0   
2  1920    0    0    0    0    0    0    0    0    0  ...    0    0    0    0   
3  1921    0    0    0    0    0    2    0    0    0  ...    0    0    1    0   
4  1922    0    0    0    0    0    0    0    0    0  ...    0    0    0    0   

   BAH  DEN  TOG  CAM  LIT  VFR  
0    0    0    0    0    1    0  
1    0    0    0    0    0    0  
2    0    0    0    0    1    0  
3    0    0    0    0    0    0  
4    0    0    0    0    0    0  

[5 rows x 146 columns]


### Prepare input data for RNN

In [4]:

df = pd.read_csv('/content/drive/My Drive/Data_sci/all_alphabetical_by_recipient.csv')

# Perform one-hot encoding for categorical variables
categorical_cols = ['Recipient', 'Supplier', 'Weapon designation']
encoder = OneHotEncoder(handle_unknown='ignore')
encoded_data = encoder.fit_transform(df[categorical_cols])

# Normalize numeric variables
numeric_cols = ['Year of order', 'Number ordered', 'SIPRI TIV per unit', 'SIPRI TIV for total order', 'SIPRI TIV of delivered weapons']
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[numeric_cols])

# Concatenate encoded categorical data and scaled numeric data
input_data = np.concatenate((encoded_data.toarray(), scaled_data), axis=1)

# Define sequence length
sequence_length = 5

# Create input sequences
sequences = []
targets = []

for i in range(len(input_data) - sequence_length):
    sequence = input_data[i:i+sequence_length]
    target = 1 # Did a conflict occur this year (1 or 0)
    sequences.append(sequence)
    targets.append(target)

X = torch.tensor(sequences, dtype=torch.float32)
y = torch.tensor(targets, dtype=torch.float32)


  X = torch.tensor(sequences, dtype=torch.float32)


In [7]:
print(X.shape)
print(y.shape)

torch.Size([29053, 5, 4481])
torch.Size([29053])


### Define the RNN

In [None]:
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        out = self.sigmoid(out)
        return out

input_size = X.shape[2]
output_size = 1
hidden_size = 64
num_layers = 1

model = RNNModel(input_size, hidden_size, output_size, num_layers)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


### Training

In [None]:
num_epochs = 10
for epoch in range(num_epochs):

    outputs = model(X)
    loss = criterion(outputs.squeeze(), y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')