# Building Populations and Factions

In [40]:
import numpy as np
import pandas as pd

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

import sys, os
import altair as alt

# mapping to the modules that make the app
sys.path.insert(0, "../../app")


%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [2]:
import creators.homeworld as homeworld

homeworld.Faction

creators.homeworld.Faction

Example data from the input form:

In [3]:
data = {
    "planet_name": "Earth",
    "num_planets": "6",
    "num_moons": "24",
    "home_has_moons": "on",
    "starting_pop": "7",
    "conformity": "0.3",
    "literacy": "0.7",
    "aggression": "0.5",
    "constitution": "0.5",
    "name": "form",
    "objid": "4864559553238",
    "username": "Billmanh",
    "objtype": "form",
    "id": "4864559553238",
}

In [4]:
species = homeworld.Species()
species.build_attr(data)
species.get_data()

{'name': 'Roudai',
 'objid': '1859555884197',
 'label': 'species',
 'consumes': 'organic',
 'effuses': 'organic waste',
 'viral_resilience': 0.7,
 'habitat_resilience': 0.2}

In [5]:
pops = [homeworld.Pop(species) for i in range(int(data["starting_pop"]))]
pops

[<pop: pop; 7750803014250; unnamed>,
 <pop: pop; 5702609366813; unnamed>,
 <pop: pop; 3149795765312; unnamed>,
 <pop: pop; 1222543194179; unnamed>,
 <pop: pop; 6103740850185; unnamed>,
 <pop: pop; 8641355839452; unnamed>,
 <pop: pop; 2672601626527; unnamed>]

In [6]:
pops_df = pd.DataFrame([p.get_data() for p in pops])
pops_df

Unnamed: 0,name,objid,label,conformity,literacy,aggression,constitution,health,isInFaction,industry,wealth,factionLoyalty,isIdle
0,unnamed,7750803014250,pop,0.401,0.702,0.952,0.844,0.5,,0.898,0.8,0.539,True
1,unnamed,5702609366813,pop,0.283,0.88,0.491,0.462,0.5,,0.4765,0.67825,0.279,True
2,unnamed,3149795765312,pop,0.2,0.505,0.543,0.206,0.5,,0.3745,0.43975,0.123,True
3,unnamed,1222543194179,pop,0.203,0.892,0.594,0.383,0.5,,0.4885,0.69025,0.177,True
4,unnamed,6103740850185,pop,0.296,0.772,0.308,0.432,0.5,,0.37,0.571,0.334,True
5,unnamed,8641355839452,pop,0.566,0.888,0.323,0.142,0.5,,0.2325,0.56025,0.598,True
6,unnamed,2672601626527,pop,0.562,0.524,0.184,0.493,0.5,,0.3385,0.43125,0.522,True


populations are unnamed because I use the faction name in the name. So they are given a name after the faction. 

In [7]:
n_factions = homeworld.get_n_factions(homeworld.n_steps, float(data["conformity"]))
kmeans = KMeans(n_clusters=n_factions).fit(
    pops_df[[c for c in pops_df.columns if c in homeworld.starting_attributes]]
)

In [8]:
factions = [homeworld.Faction(i) for i in range(kmeans.n_clusters)]
factions

[<faction: no type; 0058026195137; Bilgenfranhor>,
 <faction: no type; 0018958629315; Dudro>,
 <faction: no type; 5810243650957; Gi>]

In [9]:
# Assign the pop to that faction number, not yet matched to an ID.
for i, n in enumerate(kmeans.labels_):
    pops[i].set_faction(n)

In [12]:
# Set the name of the population to comply with the faction it is in.
for p in pops:
    faction = [i for i in factions if i.faction_no == p.factionNo][0]
    if p.name == "":
        p.name = p.make_name(2, 2)
    p.set_pop_name(faction)
    faction.assign_pop_to_faction(p)

[p.get_data() for p in pops][1]

{'name': 'Gi Dad',
 'objid': '5702609366813',
 'label': 'pop',
 'conformity': 0.283,
 'literacy': 0.88,
 'aggression': 0.491,
 'constitution': 0.462,
 'health': 0.5,
 'isInFaction': '5810243650957',
 'industry': 0.47650000000000003,
 'wealth': 0.67825,
 'factionLoyalty': 0.279,
 'isIdle': 'True'}

In [35]:
pd.DataFrame(kmeans.cluster_centers_, columns=homeworld.starting_attributes)

Unnamed: 0,conformity,literacy,aggression,constitution
0,0.564,0.706,0.2535,0.3175
1,0.401,0.702,0.952,0.844
2,0.2455,0.76225,0.484,0.37075


In [37]:
pca = PCA(n_components=2)
X_r = pca.fit(kmeans.cluster_centers_).transform(kmeans.cluster_centers_)

Per the docs at sklearn:

`explained_variance_ratio_`: Percentage of variance explained by each of the selected components.

In [38]:
pca.explained_variance_ratio_

array([0.88409421, 0.11590579])

In [39]:
X_r

array([[-0.37570541, -0.13471467],
       [ 0.51006509, -0.05045245],
       [-0.13435968,  0.18516713]])

In [45]:
source = pd.DataFrame(X_r, columns=["long", "lat"])

alt.Chart(source).mark_circle(size=60).encode(
    x="lat", y="long",
)