In [None]:
# http://www.cs.umd.edu/class/spring2023/cmsc320-0101/files/tutorial.pdf

Introduction:
Communication during warfare has always been of the utmost importance, especially in military intelligence and counter-intelligence. As warfare has become more complex, so too has the information that must be passed around. With this increase in complexity, data processing and statistical techniques have become vital to make sense of the incoming data in order to use resources more efficiently and prevent unnessesary losses, not only of equipment but also of life.

In [2]:
import requests
from numpy import NaN
import pandas as pd
import numpy as np
import re
import datetime
from urllib.request import urlopen
from bs4 import BeautifulSoup
import math
import matplotlib.pyplot as plt

Data Collection

Data Curation, Parsing, and Management

In [3]:
headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36','From': 'pleaseletmein@gmail.com'}

#r = requests.get('https://www.spaceweatherlive.com/en/solar-activity/top-50-solar-flares.html', headers=headers)

In [4]:
# Russian equipment lost per day since start of war split into category
# https://www.kaggle.com/datasets/piterfm/2022-ukraine-russian-war (info about categories and datasource)
df_RU_equipment = pd.read_csv("data/russia_losses_equipment.csv")
# Convert the date to standardized datetime format
df_RU_equipment['date'] = pd.to_datetime(df_RU_equipment['date'])
# Reorder the columns for ease of viewing
df_RU_equipment = df_RU_equipment.set_index(['date', 'day', 'greatest losses direction'])
# df_RU_equipment = df_RU_equipment.drop('greatest losses direction', axis=1)
# set missing entries to NaN
df_RU_equipment = df_RU_equipment.diff().fillna(df_RU_equipment).fillna(0).astype(int).reset_index()
# Display part of the data
df_RU_equipment.tail(10)

Unnamed: 0,date,day,greatest losses direction,aircraft,helicopter,tank,APC,field artillery,MRL,military auto,fuel tank,drone,naval ship,anti-aircraft warfare,special equipment,mobile SRBM system,vehicles and fuel tanks,cruise missiles
399,2023-03-31,401,,0,0,5,3,4,0,0,0,9,0,0,3,0,3,0
400,2023-04-01,402,,0,0,1,4,8,1,0,0,0,0,1,2,0,7,0
401,2023-04-02,403,,0,0,2,5,4,0,0,0,1,0,0,0,0,9,0
402,2023-04-03,404,,0,0,1,7,7,0,0,0,13,0,1,2,0,16,0
403,2023-04-04,405,,0,0,8,6,3,1,0,0,15,0,0,0,0,9,0
404,2023-04-05,406,,0,0,2,6,10,4,0,0,6,0,0,1,0,11,0
405,2023-04-06,407,,0,1,2,8,7,0,0,0,4,0,1,3,0,1,0
406,2023-04-07,408,,0,0,2,3,8,1,0,0,4,0,0,2,0,13,0
407,2023-04-08,409,,1,0,3,4,5,0,0,0,7,0,1,0,0,12,0
408,2023-04-09,410,,0,0,0,4,13,0,0,0,14,0,0,5,0,3,0


In [5]:
# Russian personnel lost per day
# Prisoners column stopped being updated since 2022-04-28
# https://www.kaggle.com/datasets/piterfm/2022-ukraine-russian-war (info about categories and datasource)
df_RU_personnel = pd.read_csv("data/russia_losses_personnel.csv")
df_RU_personnel['date'] = pd.to_datetime(df_RU_personnel['date'])
df_RU_personnel = df_RU_personnel.set_index(['date', 'day', 'personnel*'])
df_RU_personnel = df_RU_personnel.diff().fillna(df_RU_personnel).fillna(0).astype(int).reset_index()
df_RU_personnel.tail(10)

Unnamed: 0,date,day,personnel*,personnel,POW
399,2023-03-31,401,about,460,0
400,2023-04-01,402,about,630,0
401,2023-04-02,403,about,560,0
402,2023-04-03,404,about,610,0
403,2023-04-04,405,about,530,0
404,2023-04-05,406,about,550,0
405,2023-04-06,407,about,390,0
406,2023-04-07,408,about,480,0
407,2023-04-08,409,about,570,0
408,2023-04-09,410,about,470,0


In [6]:
# total breakdown of visually confirmed Ukrainian losses (have image of destruction) during war
# https://www.kaggle.com/datasets/5481b37f75c2bc8587e527d6e4d6fd7a81ac46e329e1946c8f85210cbe662eb5  (info about categories and datasource)
df_UA_total = pd.read_csv("data/losses_ukraine.csv")
df_UA_total.tail(10)

Unnamed: 0,equipment,model,sub_model,manufacturer,losses_total,Unnamed: 5,abandoned,abandoned and destroyed,captured,captured and destroyed,damaged,damaged and abandoned,damaged and captured,damaged beyond economical repair,destroyed,scuttled to prevent capture by Russia,sunk,sunk but raised by Russia
280,"Trucks, Vehicles and Jeeps",Armed pickup truck,with KPV HMG,Ukraine,1,,,,,,,,,,1.0,,,
281,"Trucks, Vehicles and Jeeps",Armed pickup truck,with M2 HMG,Ukraine,1,,,,,,,,,,1.0,,,
282,"Trucks, Vehicles and Jeeps",Armed pickup truck,with ZU-23 AA gun,Ukraine,1,,,,,,,,,,1.0,,,
283,"Trucks, Vehicles and Jeeps",Armed tactical buggy,with ATGM,Ukraine,1,,,,,,,,,,1.0,,,
284,"Trucks, Vehicles and Jeeps",Ford Ranger,,the United States,1,,,,,,,,1.0,,,,,
285,"Trucks, Vehicles and Jeeps",Peugeot P4,,France,2,,,,,,,,,,2.0,,,
286,"Trucks, Vehicles and Jeeps",Unknown tanker,,Ukraine,1,,,,,,,,,,1.0,,,
287,"Trucks, Vehicles and Jeeps",Unknown truck,,Ukraine,65,,,,4.0,,2.0,1.0,,,58.0,,,
288,"Trucks, Vehicles and Jeeps",Unknown truck,with ZU-23 AA gun,Ukraine,1,,,,,,,,,,1.0,,,
289,"Trucks, Vehicles and Jeeps",Unknown vehicle,,Ukraine,42,,,,,,,,,,42.0,,,


In [7]:
# total breakdown of visually confirmed Russian losses (have image of destruction) during war
# https://www.kaggle.com/datasets/5481b37f75c2bc8587e527d6e4d6fd7a81ac46e329e1946c8f85210cbe662eb5  (info about categories and datasource)
df_RU_total = pd.read_csv("data/losses_russia.csv")
df_RU_total.tail(10)

Unnamed: 0,equipment,model,sub_model,manufacturer,losses_total,abandoned,abandoned and destroyed,captured,captured and destroyed,captured and stripped,damaged,damaged and abandoned,damaged and captured,damaged beyond economical repair,destroyed,destroyed in a non-combat related incident,sunk
328,"Trucks, Vehicles and Jeeps",LuAZ-969,,the Soviet Union,2,,,,,,,,1.0,,1.0,,
329,"Trucks, Vehicles and Jeeps",UAZ Patriot jeep,,Russia,3,,,1.0,,,,,,,2.0,,
330,"Trucks, Vehicles and Jeeps",UAZ-31514,,Russia,1,,,1.0,,,,,,,,,
331,"Trucks, Vehicles and Jeeps",UAZ-23632 pickup truck,,Russia,3,,,3.0,,,,,,,,,
332,"Trucks, Vehicles and Jeeps",UAZ-23632-148-64 armed pickup truck,,Russia,5,,,,,,,,,,5.0,,
333,"Trucks, Vehicles and Jeeps",UAZ-394511 ‘Esaul’,,Russia,5,,,,,,,,2.0,,3.0,,
334,"Trucks, Vehicles and Jeeps",UAZ-515195 'Esaul',,Russia,1,,,,,,,,,,1.0,,
335,"Trucks, Vehicles and Jeeps",Unknown fuel tanker,,Russia,4,,,,,,,,,,4.0,,
336,"Trucks, Vehicles and Jeeps",(Unknown) truck,,Russia,262,1.0,,10.0,1.0,,7.0,2.0,,,241.0,,
337,"Trucks, Vehicles and Jeeps",(Unknown) vehicle,,Russia,45,,,,,,6.0,,,,39.0,,


In [9]:
# https://www.tutorialspoint.com/how-to-check-the-data-type-in-pandas-dataframe
print(df_RU_equipment["aircraft"].dtype)

int32


In [18]:
# Create some summary statistics for Russian equipment losses
df_RU_equipment_summary = df_RU_equipment.describe()

# Drop the "day" column - summary statistics are meaningless here
df_RU_equipment_summary.drop(["day"], axis=1, inplace=True)
# Drop the "count" column - already know we have 1 entry for each day
df_RU_equipment_summary.drop(["count"], axis=0, inplace=True)

# Return and display the summary statistics
df_RU_equipment_summary

Unnamed: 0,aircraft,helicopter,tank,APC,field artillery,MRL,military auto,fuel tank,drone,naval ship,anti-aircraft warfare,special equipment,mobile SRBM system,vehicles and fuel tanks,cruise missiles
mean,0.750611,0.713936,8.889976,17.173594,6.699267,1.303178,4.158924,0.185819,5.652812,0.04401,0.689487,0.755501,0.00978,13.696822,2.227384
std,1.760516,1.890074,8.267591,29.222636,6.193373,2.127323,15.693657,3.009738,6.677204,0.238499,1.169008,1.232276,0.139685,88.772987,9.664197
min,0.0,0.0,0.0,-14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,3.0,7.0,3.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,3.0,0.0
50%,0.0,0.0,7.0,12.0,5.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,8.0,0.0
75%,1.0,1.0,13.0,22.0,9.0,2.0,0.0,0.0,8.0,0.0,1.0,1.0,0.0,14.0,0.0
max,17.0,20.0,80.0,516.0,49.0,17.0,179.0,60.0,53.0,2.0,8.0,10.0,2.0,1796.0,84.0


In [20]:
# Create some summary statistics for Russian personnel losses
df_RU_personnel_summary = df_RU_personnel.describe()

df_RU_personnel_summary.drop(["day"], axis=1, inplace=True)
df_RU_personnel_summary.drop(["count"], axis=0, inplace=True)

df_RU_personnel_summary

Unnamed: 0,personnel,POW
mean,435.574572,1.212714
std,322.98414,10.972838
min,0.0,0.0
25%,200.0,0.0
50%,370.0,0.0
75%,620.0,0.0
max,3160.0,200.0


Exploratory Data Analysis

In [None]:
# TODO
    # Could do analysis on correltaions b/t Ukrainian vehical losses and Russian vehical losses, in order to see how effective the Ukrainian military is
        # Would likely want to aggregate data by vehical type
    # Could compare Russian personnel and equipment losses to find trends

Hypothesis Testing and Machine Learning

In [None]:
# TODO

Interpretation