# Building Populations and Factions

In [4]:
import numpy as np
import pandas as pd

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

import sys, os
import altair as alt

# mapping to the modules that make the app
sys.path.insert(0, "../..")
sys.path.insert(0, "../../app")


%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [5]:
from app import creators

In [9]:
from app.creators import homeworld
from app.objects import species, population

Example data from the input form:

In [7]:
data = {
    "label": "form",
    "name": "worldgenform",
    "objid": "0000000000001",
    "owner": "user.username",
    "username": "user.username",
    "accountid": "0000000000001",
    "conformity": 0.5,
    "constitution": 0.5,
    "literacy": 0.5,
    "aggression": 0.5,
    "num_planets": 4,
    "num_moons": 10,
    "starting_pop": 7,
    "organics": 0.5,
    "minerals": 0.5,
}

# Species

In [10]:
species = species.Species()
species.build_attr(data)
species.get_data()

{'name': 'Giobilyganj',
 'objid': '7664494834698',
 'label': 'species',
 'consumes': ['Organic'],
 'effuses': ['Organic waste', 'Plastics'],
 'viral_resilience': 0.7,
 'habitat_resilience': 0.2}

In [11]:
pops = [population.Pop(species) for i in range(int(data["starting_pop"]))]
pops

[<pop: pop; 5737175283133; unnamed>,
 <pop: pop; 3792350943299; unnamed>,
 <pop: pop; 0445402194292; unnamed>,
 <pop: pop; 9911658268206; unnamed>,
 <pop: pop; 5232694087667; unnamed>,
 <pop: pop; 0835631676356; unnamed>,
 <pop: pop; 0493247425748; unnamed>]

In [12]:
pops_df = pd.DataFrame([p.get_data() for p in pops])
pops_df

Unnamed: 0,name,objid,label,conformity,literacy,aggression,constitution,health,isInFaction,industry,wealth,factionLoyalty,isIdle
0,unnamed,5737175283133,pop,0.198,0.501,0.686,0.621,0.5,,0.6535,0.57725,0.177,True
1,unnamed,3792350943299,pop,0.538,0.448,0.528,0.525,0.5,,0.5265,0.48725,0.547,True
2,unnamed,445402194292,pop,0.601,0.465,0.451,0.376,0.5,,0.4135,0.43925,0.749,True
3,unnamed,9911658268206,pop,0.534,0.42,0.583,0.537,0.5,,0.56,0.49,0.411,True
4,unnamed,5232694087667,pop,0.584,0.524,0.273,0.371,0.5,,0.322,0.423,0.537,True
5,unnamed,835631676356,pop,0.553,0.418,0.396,0.758,0.5,,0.577,0.4975,0.59,True
6,unnamed,493247425748,pop,0.847,0.567,0.47,0.476,0.5,,0.473,0.52,0.835,True


populations are unnamed because I use the faction name in the name. So they are given a name after the faction is created. 

In [13]:
n_factions = homeworld.get_n_factions(homeworld.n_steps, float(data["conformity"]))
kmeans = KMeans(n_clusters=n_factions).fit(
    pops_df[[c for c in pops_df.columns if c in homeworld.starting_attributes]]
)

In [15]:
factions = [population.Faction(i) for i in range(kmeans.n_clusters)]
factions

[<faction: no type; 8068514917477; Garguiredyrydong>,
 <faction: no type; 0673942153782; No>]

In [16]:
# Assign the pop to that faction number, not yet matched to an ID.
for i, n in enumerate(kmeans.labels_):
    pops[i].set_faction(n)

In [17]:
# Set the name of the population to comply with the faction it is in.
for p in pops:
    faction = [i for i in factions if i.faction_no == p.factionNo][0]
    if p.name == "":
        p.name = p.make_name(2, 2)
    p.set_pop_name(faction)
    faction.assign_pop_to_faction(p)

[p.get_data() for p in pops][1]

{'name': 'Garguiredyrydong Mel',
 'objid': '3792350943299',
 'label': 'pop',
 'conformity': 0.538,
 'literacy': 0.448,
 'aggression': 0.528,
 'constitution': 0.525,
 'health': 0.5,
 'isInFaction': '8068514917477',
 'industry': 0.5265,
 'wealth': 0.48724999999999996,
 'factionLoyalty': 0.547,
 'isIdle': 'true'}

In [18]:
pd.DataFrame(kmeans.cluster_centers_, columns=homeworld.starting_attributes)

Unnamed: 0,conformity,literacy,aggression,constitution
0,0.6095,0.473667,0.450167,0.507167
1,0.198,0.501,0.686,0.621


In [19]:
pca = PCA(n_components=2)
X_r = pca.fit(kmeans.cluster_centers_).transform(kmeans.cluster_centers_)

Per the docs at sklearn:

`explained_variance_ratio_`: Percentage of variance explained by each of the selected components.

In [20]:
pca.explained_variance_ratio_

array([1.00000000e+00, 8.05621492e-32])

In [21]:
X_r

array([[-2.44261515e-01,  5.55111512e-17],
       [ 2.44261515e-01,  6.93889390e-17]])

In [22]:
source = pd.DataFrame(X_r, columns=["long", "lat"])

alt.Chart(source).mark_circle(size=60).encode(
    x="lat", y="long",
)