<a href="https://www.kaggle.com/code/sonyalawrence/russian-invasion-of-ukraine?scriptVersionId=93389285" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

## Assessment of damage associated with Russia's invasion of Ukraine

##### Download all libraries needed for this project

In [None]:
import pandas as pd
import numpy as np
from IPython.core.display import display
from PIL import Image
import urllib.request
%matplotlib inline 
import matplotlib as mpl
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
from math import log10, floor

print('Matplotlib version: ', mpl.__version__)
print(plt.style.available)
mpl.style.use(['ggplot'])

Read the CSV file into a Pandas DataFrame

In [None]:
russian_personnel = pd.read_csv('russia_losses_personnel.csv')
russian_equipment = pd.read_csv('russia_losses_equipment.csv')

### Let's start by getting a basic understanding of the DataFrame and cleaning the data

In [None]:
russian_personnel

In [None]:
#remove confusing columns and set index for sorting by date.
russian_personnel.drop(['personnel', 'personnel*'], axis=1, inplace=True)
russian_personnel.set_index('date', inplace=True)
russian_personnel.info()
russian_personnel.head()

In [None]:
russian_equipment

In [None]:
#fill empty spaces in Dataframe with zeroes and set index for sorting by date.
russian_equipment = russian_equipment.fillna(0)
russian_equipment.set_index('date', inplace=True)
russian_equipment.info()
russian_equipment.head()

In [None]:
#Change all numberic data to integer.
russian_equipment = russian_equipment.astype({'special equipment':np.int64, 'mobile SRBM system':np.int64})
russian_equipment.info()


### Problem 1
##### Visually represent where the invasion is happening.

In [None]:
# Downloading data
urllib.request.urlretrieve("http://www.vidiani.com/maps/maps_of_europe/maps_of_ukraine/detailed_political_and_administrative_map_of_ukraine.jpg", 
                           "ukraine_map.jpg")

# Read image
img = Image.open('ukraine_map.jpg').resize((1200, 700))

# Display Image specs
display(img)

### Problem 2

##### Show the total Prisoners of War per day recorded in the Personel table.


In [None]:
pow_data = russian_personnel[["day", "POW"]]
pow_data.reset_index('date', inplace=True)
pow_data.plot(kind='area', x='day', y='POW', color='brown', alpha = .85, figsize=(12,6), legend=False, linestyle="-.")
plt.title('Count of Prisoners of War to Date')
plt.xlabel('Day since Invasion')
plt.ylabel('Count') 
plt.show()

### Problem 3
##### Visualize the daily change in number of POW and Total POW.

In [None]:
last = 0
increase = []

for i in (pow_data['POW'].values):
    count = i - last
    increase.append(count)
    last = i
pow_data['Daily_Increase_in_Prisoners_of_War'] = increase

pow_data_ = pow_data[['day', 'POW', 'Daily_Increase_in_Prisoners_of_War']].set_index('day')
pow_data_.rename(columns={'POW':'Daily_Total_Prisoners_of_War'}, inplace=True)
pow_data_

In [None]:
pow_data_.plot(kind='line', figsize=(16,6))
plt.xlabel('Days since Invasion')
plt.ylabel('Count of Prisoners')
plt.title("Total Daily Prisoner of War VS Daily Increase in Prisoners of War")
plt.show()

### Problem 4

##### Visualize the daily change in POW

In [None]:
print(pow_data_.describe())

fig = plt.figure()
ax0 = fig.add_subplot(1, 2, 1)
ax1 = fig.add_subplot(1, 2, 2)

# Subplot 1: Box plot
pow_data_['Daily_Increase_in_Prisoners_of_War'].plot(kind='box', color='blue', figsize=(20, 12), ax=ax0, notch=True) 
ax0.set_title('Daily Increase in Prisoners of War')
ax0.set_ylabel('Daily Count of Prisoners')

# # Subplot 2: Line plot
pow_data_['Daily_Increase_in_Prisoners_of_War'].plot(kind='line', figsize=(20,6), ax=ax1, marker="|")
ax1.set_xlabel('Days since Invasion')
ax1.set_ylabel('Daily Count of Prisoners')
ax1.set_title("Daily Increase in Prisoners of War")

plt.show()

### Problem 5

##### What Is the total daily loss for all equipment?


In [None]:
daily_total_equip_loss = pd.DataFrame(russian_equipment.reset_index('date'))
daily_total_equip_loss.drop(columns={'date'}, inplace=True)
daily_total_equip_loss.set_index('day', inplace=True)
daily_total_equip_loss['DailyTotalLoss'] = pd.DataFrame(daily_total_equip_loss.sum(axis=1))
daily_total_equip_loss[['DailyTotalLoss']].plot(linestyle='dashdot', marker="o", figsize=(18,8), legend=False)
plt.title("Total Russian Equipment Lost Daily")
plt.xlabel('Days since Invasion')
plt.ylabel('Equipment loss count')
plt.show()

### Problem 6
##### Create a word map visual to show the equipments with the highest percentage loss in total equipment loss.

In [None]:
# Get the most recent loss data
total = int(daily_total_equip_loss.iloc[-1: , -1:].values)

total_loss = daily_total_equip_loss.iloc[-1: , :-1]
total_loss.reset_index(inplace=True)
total_loss = pd.DataFrame(total_loss.drop('day', axis=1).transpose()).reset_index()
total_loss.rename(columns={'index':'Equipment', 0:'Count'}, inplace=True)
total_loss.set_index('Equipment', inplace=True)
total_loss.rename(index={'field artillery':'field_artillery','military auto':'military_auto', 'fuel tank':'fuel_tank', 'naval ship':'naval_ship',
       'anti-aircraft warfare':'anti-aircraft_warfare', 'special equipment':'special_equipment', 'mobile SRBM system':'mobile_SRBM_system'}, inplace=True)

# duplicate equipment name based on how much they contribute to total total_loss
word_string=""
max_words = 260000
for equipment in total_loss.index:
     times_repeat = int(total_loss.loc[equipment, 'Count']/total*max_words)
     word_string = word_string + ((equipment + " ") * times_repeat)

# create word cloud
word_cloud = WordCloud(background_color = 'white').generate(word_string)

# Display cloud
plt.figure(figsize=(8,8))
plt.imshow(word_cloud, interpolation='bessel')
plt.axis('off')
plt.show()


### Problem 7

##### How much equipment did Russia lose in the first 5 days of the invasion?

In [None]:
daily_total_equip_loss.head(4)

### Problem 8

##### What is the most recent count of equipment loss?

In [None]:
daily_total_equip_loss.tail()

### Problem 9

##### Is there any correlation between the POW and equipment loss?


In [None]:
def round_sig(x, sig=3):
    return round(x, sig-int(floor(log10(abs(x))))-1)

merge = pd.merge(russian_equipment, russian_personnel).drop(['day'], axis=1)
merged = merge.corr()['POW'].sort_values(ascending=False)

merged.plot.barh(title='Correlation between Prisoners of War (POW) & Equipment Loss',
                 figsize=(10,8),alpha=.9,color='maroon')
for index, value in enumerate(merged):
    value = round_sig(value)
    label = format(value)
    plt.annotate(label, xy=(value-.1, index-.1), color='yellow')

### Problem 10

##### What inferences can you make from the relationship between POW and Equipment loss?


In [None]:
print(f"There is a very strong, positive, direct corralation between Prisoners of War(POW) and overall Equipment Loss. As the POW increases, so does the overall equipment loss. The strength in correlation, ranges from {round_sig(merged.values[1])} ({merged.keys()[1]}) to {round_sig(merged.values[-1])} ({merged.keys()[-1]}). The median correlation is {round_sig(merged.describe()[-3])} and it falls between {merged.keys()[6]} and {merged.keys()[7]}. The standard deviation is {round_sig(merged.describe()[2])} which indicated that the there is not much variability in the data; all points are close to the mean. It is then valid to conclude that as the invasion continues, and POW increases so will overall equipment loss especially the loss of {merged.keys()[1]} by at least {round_sig(merged.describe()[3])}.")


<!-- It should be noted that the strongest correlation exists not between the equipmet type with the highest loss({}) but with the type that had the ({}) highest loss. -->