In [1]:
import requests
from io import StringIO
import pandas as pd
import json
from os.path import exists
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression

import statsmodels.api as sm
import lime
import lime.lime_tabular
import shap
!pip install xgboost
import xgboost as xgb
from xgboost import XGBClassifier
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler



### Read all files in folder 'data'  
#### or the first n files by replacing "for path in paths" to "for path in paths[:n]"

In [2]:
import os

paths = os.listdir('data/')
all_data = []

for path in paths:
    with open('data/'+path) as json_file:
        all_data.append(json.load(json_file))


### Read a set of files specified in 'paths'

In [3]:
paths = ['data/g0m8.json', 'data/g10m61.json']
all_data = []

for path in paths:
    with open(path) as json_file:
        all_data.append(json.load(json_file))

## Layout

In [4]:
all_df = []
all_edges = []
all_features = []

for data in all_data:
    columns = ['Layer', 'NodeID', 'Move', 'Clusters', 'Columns', 'GameState', 'PlayArea']
    vals = []


    for x in list(data.keys()):
        if x == 'Edges':
            all_edges.append(data[x])
        else:
            for y in data[x]:
                vals.append([x, y, data[x][y]['Move'], data[x][y]['Clusters'], data[x][y]['Columns'], data[x][y]['GameState'], data[x][y]['PlayArea']])
    df = pd.DataFrame(vals, columns = columns)
    all_df.append(df)
    
    
    
    #print(df.iloc[0]['Move'].keys())
    
    
    all_features.append([])
    
    move_df_list = []
    column_df_list = []
    gamestate_df_list = []
    cluster_multi_df = []
    
    for inst in df.iloc:

        # Building Move DataFrames
        if inst['Layer'] == '0':
            move_df_list.append([*[0], *[None]*9])
        else:
            move_df_list.append([inst['Layer'], inst['Move']['nodeID'], inst['Move']['mctsScore'], inst['Move']['numRemovedCells'], inst['Move']['numRemovedColumns'], inst['Move']['color'], inst['Move']['columnRange'],
                                 inst['Move']['connectionsDestroyed'], inst['Move']['location'], inst['Move']['connectionsCreated']])

            
        # Building Cluster DataFrames
        cluster_df_list = []
        for x in inst['Clusters']:
            if inst['Layer'] == '0':
                cluster_df_list.append([inst['Layer'], x['nodeID'], None, x['numCells'],  x['color'],  x['shape'],  x['middleLocation'],  x['width'],  x['height']])
            else:
                cluster_df_list.append([inst['Layer'], x['nodeID'], inst['Move']['mctsScore'], x['numCells'],  x['color'],  x['shape'],  x['middleLocation'],  x['width'],  x['height']])
            
        cluster_df = pd.DataFrame(cluster_df_list, columns = ['Layer', 'nodeID', 'mctsScore', 'numCells', 'color', 'shape', 'middleLocation', 'width', 'height'])
        cluster_multi_df.append(cluster_df)
        
        
        # Building Columns DataFrames
        for x in inst['Columns']:
            if inst['Layer'] == '0':
                column_df_list.append([inst['Layer'], x['nodeID'], None, x['shape'], x['numColors'], x['colors'], x['height']])
            else:
                column_df_list.append([inst['Layer'], x['nodeID'], inst['Move']['mctsScore'], x['shape'], x['numColors'], x['colors'], x['height']])
        
        
        # Building GameState DataFrames
        if inst['Layer'] == '0':
            gamestate_df_list.append([inst['Layer'], inst['GameState']['nodeID'], None, inst['GameState']['score'], inst['GameState']['board']])
        else:
            gamestate_df_list.append([inst['Layer'], inst['GameState']['nodeID'], inst['Move']['mctsScore'], inst['GameState']['score'], inst['GameState']['board']])

            
    
    move_df = pd.DataFrame(move_df_list, columns = ['Layer', 'nodeID', 'mctsScore', 'numRemovedCells', 'numRemovedColumns', 'color', 'columnRange', 'connectionsDestroyed', 'location', 'connectionsCreated'])
    column_df = pd.DataFrame(column_df_list, columns = ['Layer', 'nodeID', 'mctsScore', 'shape', 'numColors', 'colors', 'height'])
    gamestate_df = pd.DataFrame(gamestate_df_list, columns = ['Layer', 'nodeID', 'mctsScore', 'score', 'board'])
    
    
    all_features[-1].append(move_df)
    all_features[-1].append(cluster_multi_df)
    all_features[-1].append(column_df)
    all_features[-1].append(gamestate_df)



## Format Explanation

1) The all_* variables are a list of * for each separate .json file (e.g. all_features[7] = features in the 7th file read)

2) 
For analyzing the Nth file:

dataframe = all_df[N]

features = all_features[N]

edges = all_edges[N]

3) 

-dataframe: full json file, contains everything, but very nested

-edges: list of size-2 lists of nodeIDs indicating a connection between the 2 nodes

### Features:

-features[0] = DataFrame of Move Features, 1 DataFrame for ALL Nodes, 1 row = 1 node

-features[1] = List of DataFrames, 1 DataFrame for 1 Node, 1 row = 1 cluster

features[1][2] = Dataframe of all clusters for 1 Node

P.S. If you want to concantenate all clusters for ALL nodes, do:
#### a = features[1][0]
#### for x in range(len(features[1]) - 1):
#### a = pd.concat([a, features[x+1]], axis = 0)

-features[2] = DataFrame of Columns, 1 DataFrame for ALL Nodes, 1 row = 1 column of a node

-features[3] = DataFrame of GameState Features, 1 DataFrame for ALL Nodes, 1 row = 1 node

## DataFrames: 

In [5]:
display(all_features[-1][0].head())
display(all_features[-1][1][1].head())
display(all_features[-1][2].tail())
display(all_features[-1][3].head())
display(all_df[-1].head())

Unnamed: 0,Layer,nodeID,mctsScore,numRemovedCells,numRemovedColumns,color,columnRange,connectionsDestroyed,location,connectionsCreated
0,0,,,,,,,,,
1,1,13584.0,15.0,4.0,1.0,2.0,"[2, 3, 14]",0.0,182.0,2.0
2,1,13576.0,0.0,2.0,1.0,2.0,"[9, 10, 14]",0.0,219.0,0.0
3,1,13577.0,15.0,2.0,1.0,4.0,"[5, 6, 14]",0.0,215.0,1.0
4,2,13585.0,15.0,2.0,1.0,2.0,"[8, 9, 14]",0.0,218.0,0.0


Unnamed: 0,Layer,nodeID,mctsScore,numCells,color,shape,middleLocation,width,height
0,1,13584,15,1,2,[105],"[0.5, 7.5]",1,1
1,1,13584,15,1,4,[120],"[0.5, 8.5]",1,1
2,1,13584,15,1,3,[135],"[0.5, 9.5]",1,1
3,1,13584,15,1,3,[140],"[5.5, 9.5]",1,1
4,1,13584,15,1,0,[150],"[0.5, 10.5]",1,1


Unnamed: 0,Layer,nodeID,mctsScore,shape,numColors,colors,height
205,6,13589,15.0,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -...",0,[],0
206,6,13589,15.0,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -...",0,[],0
207,6,13589,15.0,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -...",0,[],0
208,6,13589,15.0,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -...",0,[],0
209,6,13589,15.0,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -...",0,[],0


Unnamed: 0,Layer,nodeID,mctsScore,score,board
0,0,13575,,15,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -..."
1,1,13584,15.0,15,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -..."
2,1,13576,0.0,0,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -..."
3,1,13577,15.0,15,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -..."
4,2,13585,15.0,15,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -..."


Unnamed: 0,Layer,NodeID,Move,Clusters,Columns,GameState,PlayArea
0,0,13575,root node,"[{'numCells': 1, 'color': 2, 'shape': [105], '...","[{'shape': [3, 2, 4, 3, 0, 3, 4, 2, -1, -1, -1...","{'score': 15, 'nodeID': 13575, 'board': [-1, -...",board is full
1,1,13584,"{'numRemovedCells': 4, 'numRemovedColumns': 1,...","[{'numCells': 1, 'color': 2, 'shape': [105], '...","[{'shape': [3, 2, 4, 3, 0, 3, 4, 2, -1, -1, -1...","{'score': 15, 'nodeID': 13584, 'board': [-1, -...",board is full
2,1,13576,"{'numRemovedCells': 2, 'numRemovedColumns': 1,...","[{'numCells': 1, 'color': 2, 'shape': [105], '...","[{'shape': [3, 2, 4, 3, 0, 3, 4, 2, -1, -1, -1...","{'score': 0, 'nodeID': 13576, 'board': [-1, -1...",board is full
3,1,13577,"{'numRemovedCells': 2, 'numRemovedColumns': 1,...","[{'numCells': 1, 'color': 2, 'shape': [105], '...","[{'shape': [3, 2, 4, 3, 0, 3, 4, 2, -1, -1, -1...","{'score': 15, 'nodeID': 13577, 'board': [-1, -...",board is full
4,2,13585,"{'numRemovedCells': 2, 'numRemovedColumns': 1,...","[{'numCells': 1, 'color': 2, 'shape': [105], '...","[{'shape': [3, 2, 4, 3, 0, 3, 4, 2, -1, -1, -1...","{'score': 15, 'nodeID': 13585, 'board': [-1, -...",board is full


# ANALYSIS