In [None]:
import json
import sys
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re
import operator
sns.set(style="whitegrid")
%matplotlib inline
from datetime import datetime

In [None]:
battles = pd.DataFrame([json.loads(line) for line in open("../datasets/battle-features-1.json")])

In [None]:
FOLDER = "../report/figures/"

In [None]:
df = pd.DataFrame(battles).replace(0, np.NaN)
pd.options.display.max_colwidth = 200
df[['combatant_list_1','combatant_list_2']].head()

In [None]:
def find_subtext(df, txt):
    contains = df.stack().str.contains(txt).unstack()
    return contains[contains.any(1)].idxmax(1)

def get_duration(battle):
    if battle.end_date == "None":
        return 1
    else:
        return (datetime.strptime(battle.end_date, "%Y-%m-%d") - datetime.strptime(battle.start_date, "%Y-%m-%d")).days


def get_year(date):
    return datetime.strptime(date, "%Y-%m-%d").year if date else None

In [None]:
df = df[battles.start_date.notnull()]
df.fillna(value="None", inplace=True)

df["year"] = df["start_date"].apply(get_year)

df = df[(df.year>=1000) & (df.year <= 2018)]

df["duration"] = [get_duration(battle) for _, battle in df.iterrows()]

In [None]:
df_USAFights = df
df_USAFights = df_USAFights.assign(isUSA=find_subtext(df_USAFights, '(United States|USA|Confederate|CSA)'))
df_USAFights = df_USAFights[df_USAFights.start_date.notnull() & df_USAFights.isUSA.notnull()]
for i,v in enumerate(df_USAFights['duration']):
    years = 1
    if v > 365:
        years = int(v/365)+1
    for j in range(1,years+1):
        df_USAFights = df_USAFights.append({'year': int(df_USAFights.iloc[i]['year'])+1}, ignore_index=True)
        
X = df_USAFights["year"]
fig, ax = plt.subplots(figsize=(20,3))
ax.scatter(X, [1]*len(X),
           marker='|',linewidth=5, s=1000)

ax.yaxis.set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.xaxis.set_ticks_position('bottom')
ax.xaxis.set_label('Year')
ax.set_xlabel('Year', fontsize=25)
ax.tick_params(labelsize=25)
ax.get_yaxis().set_ticklabels([])
plt.xlim(1750, 2018)
plt.show()
fig.savefig(FOLDER+'USAFighting.eps', bbox_inches='tight')

In [None]:
print("The USA spent ", len(X.value_counts(), " years in which they were involved in at least one battle"))

In [None]:
count = dict()
countModern = dict()
for i,v in enumerate(df['combatant_list_1']):
    combatants = list()
    combatants.append(v)
    combatants.append(df.iloc[i]['combatant_list_2'])
    combatants.append(df.iloc[i]['combatant_list_3'])
    for j,w in enumerate(combatants):
        for k,x in enumerate(w):
            if x == 'USA' or x=='Confederate States' or x == 'CSA (Confederacy)' :
                x = 'United States'
            if x == 'Kingdom of France':
                x = 'France'
            x = re.sub('(Flag of|.svg|.png)', '', x)
            
            if x in count:
                count[x] += int(df.iloc[i]['duration'])
            else:
                count[x] = int(df.iloc[i]['duration'])

            if df.iloc[i]['year'] > 1775: 
                if x in countModern:
                    countModern[x] += int(df.iloc[i]['duration'])
                else:
                    countModern[x] = int(df.iloc[i]['duration'])
for k in count.keys():
    count[k] = int(count[k])/365
sorted_count = sorted(count.items(), key=operator.itemgetter(1), reverse=True)

for kModern in countModern.keys():
    countModern[kModern] = int(countModern[kModern])/365
sorted_countModern = sorted(countModern.items(), key=operator.itemgetter(1), reverse=True)

key, value = zip(*sorted_count[0:10])
fig, ax1 = plt.subplots(figsize=(3,2.5))
ax1 = sns.barplot(y=list(key), x=list(value), color="blue")
ax1.set_xlabel("Years of Battles", fontsize=15)
ax1.tick_params(labelsize=12)
fig.savefig('YearsFightingRanking.eps', bbox_inches='tight')

keyModern, valueModern = zip(*sorted_countModern[0:10])
figModern, ax1 = plt.subplots(figsize=(3,2.5))
ax1 = sns.barplot(y=list(keyModern), x=list(valueModern), color="blue")
ax1.set_xlabel("Years of Battles after 1776", fontsize=15)
ax1.tick_params(labelsize=12)
figModern.savefig('YearsFightingRankingModern.eps', bbox_inches='tight')

In [None]:
%run ../processing/features_extraction.py '../datasets/battle-fields-1.json' '../datasets/battle-features-2.json'