In [None]:
import json
import sys
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.transforms import Bbox
from datetime import datetime
sns.set(style="whitegrid")
%matplotlib inline

In [None]:
def densplot(columns, xlabel, title, axo):
    for i,v in enumerate(columns):
        sns.distplot(v, ax=axo, kde_kws={"label": i})
    axo.set_title(title)
    axo.set_xlabel(xlabel, fontsize=12)
    
def scatplot(xelem, yelem, xlabel, ylabel, title, axo, polyfit=None, xlim=None, ylim=None):
    axo.scatter(xelem, yelem)
    if polyfit:
        axo.plot(np.unique(xelem), np.poly1d(np.polyfit(xelem, yelem, polyfit))(np.unique(xelem)), 'C2')
    if xlim:
        axo.set_xlim(0,xlim)
    if ylim:
        axo.set_ylim(0,ylim)
    axo.set_title(title)
    axo.set_xlabel(xlabel, fontsize=12)
    axo.set_ylabel(ylabel, fontsize=12)
    
def full_extent(ax, pad=0.0):
    """Get the full extent of an axes, including axes labels, tick labels, and
    titles."""
    # For text objects, we need to draw the figure first, otherwise the extents
    # are undefined.
    ax.figure.canvas.draw()
    items = ax.get_xticklabels() + ax.get_yticklabels() 
    items += [ax, ax.title, ax.xaxis.label, ax.yaxis.label]
    items += [ax, ax.title]
    bbox = Bbox.union([item.get_window_extent() for item in items])

    return bbox.expanded(1.0 + pad, 1.0 + pad)

def get_year(date):
    return datetime.strptime(date, "%Y-%m-%d").year if date else None



In [None]:
FIGURES_FOLDER = "../report/figures/"

In [None]:
battles = pd.DataFrame([json.loads(line) for line in open("../datasets/battle-features-2.json")])
interest_col = ["casualties_1", "casualties_2", "result_combatant_1", "result_combatant_2"]
battles[interest_col].head()

In [None]:
print('total casualties: ', sum(battles['casualties_1'])+sum(battles['casualties_2'])+sum(battles['casualties_3']))
print('total strength: ', sum(battles['strength_1'])+sum(battles['strength_2'])+sum(battles['strength_3']))

In [None]:
battles = battles.query("casualties_1 > 1 and casualties_2 > 1 and strength_1 > 1 and strength_2 > 1 and (result_combatant_1 != '' or result_combatant_2 != '')")
battles = battles.query("casualties_1 < strength_1 and casualties_2 < strength_2")
battles["year"] = battles["start_date"].apply(get_year)
battles = battles.query("year>1000")

In [None]:
casuresu = battles
typeStr = "icto"
casualtiesVictory = len((casuresu.loc[(casuresu["result_combatant_1"].str.contains(typeStr) & (casuresu["casualties_1"]>casuresu["casualties_2"])) | (casuresu["result_combatant_2"].str.contains(typeStr) & (casuresu["casualties_2"]>casuresu["casualties_1"]))]))
typeStr = "decisive"
casualtiesDecisiveVictory = len((casuresu.loc[(casuresu["result_combatant_1"].str.contains(typeStr) & (casuresu["casualties_1"]>casuresu["casualties_2"])) | (casuresu["result_combatant_2"].str.contains(typeStr) & (casuresu["casualties_2"]>casuresu["casualties_1"]))]))
casuresuDecisive = len((casuresu.loc[(casuresu["result_combatant_1"].str.contains(typeStr)) | (casuresu["result_combatant_2"].str.contains(typeStr))]))
typeStr = "strategic"
casualtiesStrategicVictory = len((casuresu.loc[(casuresu["result_combatant_1"].str.contains(typeStr) & (casuresu["casualties_1"]>casuresu["casualties_2"])) | (casuresu["result_combatant_2"].str.contains(typeStr) & (casuresu["casualties_2"]>casuresu["casualties_1"]))]))
casuresuStrategic = len((casuresu.loc[(casuresu["result_combatant_1"].str.contains(typeStr)) | (casuresu["result_combatant_2"].str.contains(typeStr))]))
typeStr = "tactical"
casualtiesTacticalVictory = len((casuresu.loc[(casuresu["result_combatant_1"].str.contains(typeStr) & (casuresu["casualties_1"]>casuresu["casualties_2"])) | (casuresu["result_combatant_2"].str.contains(typeStr) & (casuresu["casualties_2"]>casuresu["casualties_1"]))]))
casuresuTactical = len((casuresu.loc[(casuresu["result_combatant_1"].str.contains(typeStr)) | (casuresu["result_combatant_2"].str.contains(typeStr))]))



casualtiesVictorypercent = casualtiesVictory*100/len(casuresu)
casualtiesDecisiveVictorypercent = casualtiesDecisiveVictory*100/casuresuDecisive
casualtiesStrategicVictorypercent = casualtiesStrategicVictory*100/casuresuStrategic
casualtiesTacticalVictorypercent = casualtiesTacticalVictory*100/casuresuTactical

print(casualtiesVictory, casualtiesDecisiveVictory, casualtiesStrategicVictory, casualtiesTacticalVictory)
print(casualtiesVictorypercent, " ", casualtiesDecisiveVictorypercent, " ", casualtiesStrategicVictorypercent, " ", casualtiesTacticalVictorypercent)

In [None]:
casustrenwin = battles
casustrenwin['casustren1'] = casustrenwin["casualties_1"]*100/casustrenwin["strength_1"]
casustrenwin['casustren2'] = casustrenwin["casualties_2"]*100/casustrenwin["strength_2"]

typeStr = "icto"
casualtiesStrengthVictory = len((casustrenwin.loc[((casustrenwin["result_combatant_1"].str.contains(typeStr) & (casustrenwin['casustren1'] > casustrenwin['casustren2'])) | (casustrenwin["result_combatant_2"].str.contains(typeStr) & (casustrenwin['casustren2']>casustrenwin['casustren1'])))]))
typeStr = "decisive"
casualtiesStrengthDecisiveVictory = len((casustrenwin.loc[((casustrenwin["result_combatant_1"].str.contains(typeStr) & (casustrenwin['casustren1'] > casustrenwin['casustren2'])) | (casustrenwin["result_combatant_2"].str.contains(typeStr) & (casustrenwin['casustren2']>casustrenwin['casustren1'])))]))
casustrenwinDecisive = len((casustrenwin.loc[((casustrenwin["result_combatant_1"].str.contains(typeStr)) | (casustrenwin["result_combatant_2"].str.contains(typeStr)))]))
typeStr = "strategic"
casualtiesStrengthStrategicVictory = len((casustrenwin.loc[((casustrenwin["result_combatant_1"].str.contains(typeStr) & (casustrenwin['casustren1'] > casustrenwin['casustren2'])) | (casustrenwin["result_combatant_2"].str.contains(typeStr) & (casustrenwin['casustren2']>casustrenwin['casustren1'])))]))
casustrenwinStrategic = len((casustrenwin.loc[((casustrenwin["result_combatant_1"].str.contains(typeStr)) | (casustrenwin["result_combatant_2"].str.contains(typeStr)))]))
typeStr = "tactical"
casualtiesStrengthTacticalVictory = len((casustrenwin.loc[((casustrenwin["result_combatant_1"].str.contains(typeStr) & (casustrenwin['casustren1'] > casustrenwin['casustren2'])) | (casustrenwin["result_combatant_2"].str.contains(typeStr) & (casustrenwin['casustren2']>casustrenwin['casustren1'])))]))
casustrenwinTactical = len((casustrenwin.loc[((casustrenwin["result_combatant_1"].str.contains(typeStr)) | (casustrenwin["result_combatant_2"].str.contains(typeStr)))]))




casualtiesStrengthVictorypercent = casualtiesStrengthVictory*100/len(casustrenwin)
casualtiesStrengthDecisiveVictorypercent = casualtiesStrengthDecisiveVictory*100/casustrenwinDecisive
casualtiesStrengthStrategicVictorypercent = casualtiesStrengthStrategicVictory*100/casustrenwinStrategic
casualtiesStrengthTacticalVictorypercent = casualtiesStrengthTacticalVictory*100/casustrenwinTactical

print(casualtiesStrengthVictory, casualtiesStrengthDecisiveVictory, casualtiesStrengthStrategicVictory, casualtiesStrengthTacticalVictory)
print(casualtiesStrengthVictorypercent, " ", casualtiesStrengthDecisiveVictorypercent, " ", casualtiesStrengthStrategicVictorypercent, " ", casualtiesStrengthTacticalVictorypercent)

We do a similar process for the strength vs. results relationship

In [None]:
interest_col = ["strength_1", "strength_2", "result_combatant_1", "result_combatant_2"]
battles.head()[interest_col]

In [None]:
strenresu = battles
typeStr = "icto"
strengthVictory = len((strenresu.loc[(strenresu["result_combatant_1"].str.contains(typeStr) & (strenresu["strength_1"]<strenresu["strength_2"])) | (strenresu["result_combatant_2"].str.contains(typeStr) & (strenresu["strength_2"]<strenresu["strength_1"]))]))
typeStr = "decisive"
strengthDecisiveVictory = len((strenresu.loc[(strenresu["result_combatant_1"].str.contains(typeStr) & (strenresu["strength_1"]<strenresu["strength_2"])) | (strenresu["result_combatant_2"].str.contains(typeStr) & (strenresu["strength_2"]<strenresu["strength_1"]))]))
strenresuDecisive = len(strenresu.loc[(strenresu["result_combatant_1"].str.contains(typeStr) | strenresu["result_combatant_2"].str.contains(typeStr))])
typeStr = "strategic"
strengthStrategicVictory = len((strenresu.loc[(strenresu["result_combatant_1"].str.contains(typeStr) & (strenresu["strength_1"]<strenresu["strength_2"])) | (strenresu["result_combatant_2"].str.contains(typeStr) & (strenresu["strength_2"]<strenresu["strength_1"]))]))
strenresuStrategic = len(strenresu.loc[(strenresu["result_combatant_1"].str.contains(typeStr) | strenresu["result_combatant_2"].str.contains(typeStr))])
typeStr = "tactical"
strengthTacticalVictory = len((strenresu.loc[(strenresu["result_combatant_1"].str.contains(typeStr) & (strenresu["strength_1"]<strenresu["strength_2"])) | (strenresu["result_combatant_2"].str.contains(typeStr) & (strenresu["strength_2"]<strenresu["strength_1"]))]))
strenresuTactical = len(strenresu.loc[(strenresu["result_combatant_1"].str.contains(typeStr) | strenresu["result_combatant_2"].str.contains(typeStr))])

strengthVictorypercent = strengthVictory*100/len(strenresu)
strengthDecisiveVictorypercent = strengthDecisiveVictory*100/(strenresuDecisive)
strengthStrategicVictorypercent = strengthStrategicVictory*100/(strenresuStrategic)
strengthTacticalVictorypercent = strengthTacticalVictory*100/(strenresuTactical)

print(len(strenresu), strenresuDecisive, strengthStrategicVictory, strengthTacticalVictory)
print(strengthVictorypercent, " ", strengthDecisiveVictorypercent, " ", strengthStrategicVictorypercent, " ", strengthTacticalVictorypercent)

**We observe that the number of casualties seems to be more important for the outcome of the battle. Since the opponent with more soldiers only wins in 53% of the cases.**

In [None]:
#NOT used to plot
strengthBased = 100-hardwinpercent
casualtiesBased = 100-painfulwinpercent
strengthAndCasualtiesBased = 100-strenCasuPercent
fig, ax1 = plt.subplots(1, 1, figsize=(10,5))
y = [strengthBased, casualtiesBased, strengthAndCasualtiesBased]
x = ['strength', 'casualties', 'casualties per strength (%)']
sns.barplot(y=y, x=x, color="blue", ax = ax1)
ax1.set_ylim(0,100)
ax1.set_xlabel('Feature', fontsize=20)
ax1.set_ylabel('Percent of victories', fontsize=20)
ax1.tick_params(labelsize=15)
test1 = full_extent(ax1).transformed(fig.dpi_scale_trans.inverted())
fig.savefig(FOLDER+'VictoryAdvantage.eps', bbox_inches=test1)

In [None]:
strengthBased = 100-strengthVictorypercent
strengthBasedDecisive = 100- strengthDecisiveVictorypercent
strengthBasedStrategic = 100- strengthStrategicVictorypercent
strengthBasedTactical = 100- strengthTacticalVictorypercent

casualtiesBased = 100-casualtiesVictorypercent
casualtiesBasedDecisive = 100-casualtiesDecisiveVictorypercent
casualtiesBasedStrategic = 100-casualtiesStrategicVictorypercent
casualtiesBasedTactical = 100-casualtiesTacticalVictorypercent

casualtiesStrengthBased = 100-casualtiesStrengthVictorypercent
casualtiesStrengthBasedDecisive = 100-casualtiesStrengthDecisiveVictorypercent
casualtiesStrengthBasedStrategic = 100-casualtiesStrengthStrategicVictorypercent
casualtiesStrengthBasedTactical = 100-casualtiesStrengthTacticalVictorypercent


y = [strengthBased, strengthBasedDecisive, strengthBasedStrategic, strengthBasedTactical, casualtiesBased, casualtiesBasedDecisive, casualtiesBasedStrategic, casualtiesBasedTactical, casualtiesStrengthBased, casualtiesStrengthBasedDecisive, casualtiesStrengthBasedStrategic, casualtiesStrengthBasedTactical]


In [None]:
fig, ax = plt.subplots(figsize=(10,5))
width = 0.25
colors = ['lightcoral','darkseagreen', '#5975A4', 'g']
shift=0
for i, v in enumerate(y):
    if i%4 == 0:
        shift = shift+1
    else:
        shift = shift+(width+0.01)

    down = plt.bar(shift, v, width, color=colors[i%4])

maj_ticks = [1.4, 3.2, 5]
# Sets the minor and major ticks 
ax.set_xticks(maj_ticks)

ax.set_xticklabels(["Strength", "Casualties", "Casualties per strength (%)"], fontsize=15)
ax.tick_params(axis='x', which='major', labelsize=20)
ax.set_ylim(0,100)
ax.set_ylabel('Victory (%)', fontsize=20)
ax.tick_params(labelsize=15)
plt.legend(['Any', 'Decisive', 'Strategic', 'Tactical'], loc='upper left', fontsize=15, frameon=True)
fig.tight_layout()
#fig.savefig(FOLDER+'VictoryAdvantage.eps')