In [3]:
#!wget https://github.com/SimoneRitt/IACModel_CCM/tree/main/IAC_plotting.py
import IAC_plotting

import pandas as pd
from sklearn import datasets

# Original Example: Jets and Sharks

In [6]:
#!wget https://github.com/SimoneRitt/IACModel_CCM/tree/main/data/jets_sharks.csv

In [8]:
test1 = pd.read_csv('data/jets_sharks.csv')
test1

Unnamed: 0,Name,Gang,Age,Edu,Mar,Occupation
0,Art,Jets,40's,J.H.,Sing.,Pusher
1,Al,Jets,30's,J.H.,Mar.,Burglar
2,Sam,Jets,20's,COL.,Sing.,Bookie
3,Clyde,Jets,40's,J.H.,Sing.,Bookie
4,Mike,Jets,30's,J.H.,Sing.,Bookie
5,Jim,Jets,20's,J.H.,Div.,Burglar
6,Greg,Jets,20's,H.S.,Mar.,Pusher
7,John,Jets,20's,J.H.,Mar.,Burglar
8,Doug,Jets,30's,H.S.,Sing.,Bookie
9,Lance,Jets,20's,J.H.,Mar.,Burglar


In [10]:
IAC_plotting.plot(test1)

# New Example: Iris Dataset

## Creating DataFrame

For effective visualization, the DataFrame should have relatively few unique column values, as these will become the nodes of each pool. It is recommended that users create bins for continuous values to avoid having an excess of nodes.

In [14]:
iris = datasets.load_iris()
test2 = pd.DataFrame(data = iris['data'],
                    columns = iris['feature_names'])
test2 = pd.concat([test2, pd.Series(iris['target'], name='target_names')], axis='columns')
test2['target_names'] = test2['target_names'].apply(func=lambda x: iris['target_names'][x])
test2

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target_names
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [16]:
for c in test2.columns:
    if test2[c].dtype != object:
        print(f"Range of {c} = {test2[c].max() - test2[c].min()}")

Range of sepal length (cm) = 3.6000000000000005
Range of sepal width (cm) = 2.4000000000000004
Range of petal length (cm) = 5.9
Range of petal width (cm) = 2.4


Because the ranges are relatively small, we can make buckets for nodes as [x, x+1).

In [19]:
import math 

def create_buckets(x):
    lower_bound = math.floor(x)
    return f"[{lower_bound}, {lower_bound+1})"

for c in test2.columns:
    if test2[c].dtype != object:
        test2[c] = test2[c].apply(create_buckets)

test2

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target_names
0,"[5, 6)","[3, 4)","[1, 2)","[0, 1)",setosa
1,"[4, 5)","[3, 4)","[1, 2)","[0, 1)",setosa
2,"[4, 5)","[3, 4)","[1, 2)","[0, 1)",setosa
3,"[4, 5)","[3, 4)","[1, 2)","[0, 1)",setosa
4,"[5, 6)","[3, 4)","[1, 2)","[0, 1)",setosa
...,...,...,...,...,...
145,"[6, 7)","[3, 4)","[5, 6)","[2, 3)",virginica
146,"[6, 7)","[2, 3)","[5, 6)","[1, 2)",virginica
147,"[6, 7)","[3, 4)","[5, 6)","[2, 3)",virginica
148,"[6, 7)","[3, 4)","[5, 6)","[2, 3)",virginica


In [21]:
IAC_plotting.plot(test2, hidden_state='target_names')