# Goal
### General Goal
Obtain interesting insights into the data (specifically the units) of Dominions 4
### Specific Goals
- Determine common base units e.g. human

# Dataset
## BaseU
|Attribute|dtype |Usage|Description|
|---------|------|-----|-----------|
|id       |uint  | |Unique identifier, some values are skipped|
|name     |string| |Textual identifier|

In [202]:
print("{}%".format(round(data['baseu']['wpn1'].count() / data['baseu'].index.size, 4) * 100))

99.14%


In [None]:
pd.DataFrame(columns=)

## Setup
Run this everytime the kernel starts

In [10]:
# Standard imports
import os
from pathlib import Path
import sys
import math
import random
import shutil

# Package imports
import pandas as pd
import numpy as np
from IPython.display import display
import h5py
import plotly
from pandas.api.types import CategoricalDtype

In [216]:
# Important variables
class dom4data:
    raw_data_path = Path('../../repos/dom4inspector/gamedata/')
    loaded = False
    
    BaseU = 0
    
    def is_loaded():
        return dom4data.loaded
    def unload():
        dom4data.BaseU = 0
        dom4data.loaded = False
    def load():

        if dom4data.loaded:
            dom4data.unload()
        
        
        resolve = lambda name: dom4data.raw_data_path / (name + ".csv")

        encoding = 'ISO-8859–1'
        sep = '\t'

        flag_converter = lambda x: x != ""

        dom4data.BaseU = pd.read_csv(resolve("BaseU"), encoding=encoding, sep=sep,
                                    dtype={
                                        'startage':float,'allrange':str,'startitem':str,

                                           'size':'category',

                                           # Equipment references
                                           'wpn1':'category','wpn2':'category','wpn3':'category','wpn4':'category','wpn5':'category','wpn6':'category','wpn7':'category',
                                           'armor1':'category','armor2':'category','armor3':'category','armor4':'category',

                                          },

                                    converters={
                                        #Flags
                                        'holy': flag_converter,
                                        'inquisitor': flag_converter,
                                        'inanimate': flag_converter,
                                        'undead': flag_converter,
                                        'magicbeing': flag_converter,
                                        'stonebeing': flag_converter,
                                        'animal': flag_converter,
                                        'coldblood': flag_converter,
                                        'female': flag_converter,
                                        'forestsurvival': flag_converter,
                                        'mountainsurvival': flag_converter,
                                        'wastesurvival': flag_converter,
                                        'swampsurvival': flag_converter,
                                        'cavesurvival': flag_converter,
                                        'aquatic': flag_converter,
                                        'amphibian': flag_converter,
                                        'pooramphibian': flag_converter,
                                        'float': flag_converter,
                                        'flying': flag_converter,
                                        'stormimmune': flag_converter,
                                        'teleport': flag_converter,
                                        'immobile': flag_converter,
                                        'noriverpass': flag_converter,
                                    },

                                    na_values={'startage':["120 ? 320"]})



In [217]:
dom4data.load()


In [78]:
subset_attributes = ['name','wpn1','wpn2','armor1','armor2',
                     'size','hp','prot','str','att','def','prec','enc',
                     'leader','undeadleader','magicleader']
round(dom4data.BaseU[subset_attributes].describe(), 2)

Unnamed: 0,hp,prot,str,att,def,prec,enc,leader,undeadleader,magicleader
count,2919.0,2919.0,2919.0,2919.0,2919.0,2919.0,2919.0,2919.0,2604.0,2615.0
mean,27.84,2.98,13.34,10.88,10.44,9.6,2.46,46.78,9.07,4.49
std,36.8,4.79,4.76,2.29,2.6,2.49,1.37,35.65,25.98,15.13
min,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,10.0,0.0,10.0,10.0,9.0,8.0,2.0,40.0,0.0,0.0
50%,14.0,0.0,12.0,11.0,10.0,10.0,3.0,40.0,0.0,0.0
75%,27.0,5.0,15.0,12.0,12.0,11.0,3.0,40.0,0.0,0.0
max,500.0,35.0,45.0,25.0,25.0,25.0,20.0,160.0,160.0,160.0


In [159]:
value_check = lambda table, column: data[table][data[table][column].notnull()][column]
flag_check = lambda table, column: len(set(value_check(table, column))) == 1
print(flag_check('baseu','aquatic'))
print(flag_check('baseu','amphibian'))
print(flag_check('baseu','pooramphibian'))
print(flag_check('baseu','float'))
print(flag_check('baseu','flying'))
print(flag_check('baseu','stormimmune'))
print(flag_check('baseu','teleport'))
print(flag_check('baseu','immobile'))
print(flag_check('baseu','noriverpass'))
print(flag_check('baseu','coldblood'))
print("")
print(value_check('baseu', 'coldblood'))
data['baseu']['mind']

False
False
False
False
False
False
False
False
False
False

0       False
1       False
2        True
3       False
4       False
5       False
6       False
7       False
8       False
9       False
10      False
11      False
12      False
13      False
14      False
15      False
16      False
17      False
18      False
19      False
20      False
21      False
22      False
23      False
24      False
25      False
26      False
27      False
28      False
29      False
        ...  
2889    False
2890    False
2891    False
2892    False
2893    False
2894    False
2895    False
2896    False
2897    False
2898    False
2899    False
2900    False
2901    False
2902    False
2903    False
2904    False
2905    False
2906    False
2907    False
2908    False
2909    False
2910    False
2911    False
2912    False
2913    False
2914    False
2915    False
2916    False
2917    False
2918    False
Name: coldblood, Length: 2919, dtype: bool


0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
5      NaN
6      NaN
7      NaN
8      NaN
9      NaN
10     NaN
11     NaN
12     NaN
13     NaN
14     NaN
15     NaN
16     NaN
17     NaN
18     NaN
19     NaN
20     NaN
21     NaN
22     NaN
23     NaN
24     NaN
25     NaN
26     NaN
27     NaN
28     NaN
29     NaN
        ..
2889   NaN
2890   NaN
2891   NaN
2892   NaN
2893   NaN
2894   NaN
2895   NaN
2896   NaN
2897   NaN
2898   NaN
2899   NaN
2900   NaN
2901   NaN
2902   NaN
2903   NaN
2904   NaN
2905   NaN
2906   NaN
2907   NaN
2908   NaN
2909   NaN
2910   NaN
2911   NaN
2912   NaN
2913   NaN
2914   NaN
2915   NaN
2916   NaN
2917   NaN
2918   NaN
Name: mind, Length: 2919, dtype: float64

## Unit Clustering

In [None]:
clusters = []
new_clusters = []

def create_cluster():
    cluster = {
        'size' : 0,
        'str':0
    }
    return cluster
    
def create_random_cluster():
    cluster = {
        'size' : random.randrange(7),
        'str': random.randrange(50)
    }
    return cluster

In [None]:
def pred1(row_label):
    global clusters
    row = dm.data['BaseU'].loc[row_label]
    size = row['size']
    strength = row['str']
    cur = 0
    for i in range(1,len(clusters)):
        if abs(clusters[i]['size'] - size) < abs(clusters[cur]['size'] - size):
            cur = i
    return str(cur)



In [None]:

def cluster(k,predicate=pred1):
    global clusters
    if not dm.is_open():
        dm.open()
    
    if len(clusters) == 0:
        # Create random clusters
        for i in range(k):
            clusters.append(create_random_cluster())
    
    BaseU = dm.data['BaseU']
    groups = BaseU.groupby(predicate)
    return groups


In [None]:
test = cluster(6)

In [None]:
print(test.groups)

In [None]:
def calculate_cluster(row):
    global new_clusters
    return 0

In [None]:
def recalculate_clusters(groups):
    global clusters
    global new_clusters
    
    new_clusters = []
    for i in range(len(clusters)):
        new_clusters.append(create_cluster())
    
    ga = groups.aggregate({'size':np.mean,'str':np.mean})
    
    return ga
    

In [None]:
print(test.groups)
ga = recalculate_clusters(test)
print(new_clusters)
print(ga)

In [None]:
ga.iloc[0]

In [None]:
BaseU['str'].min()

## Box Plots showing relation between size and str

In [None]:
box_groups = BaseU[{'size','str'}].groupby('size')
box_groups.boxplot(subplots=False,figsize={16,15})
box_groups.str.groups

In [None]:
BaseU[{'size','str'}]