In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import networkx as nx
import freeman as fm

In [2]:
g = fm.load('marvel.gml')
g.label_nodes('name')
g.set_all_nodes(size=15, labpos='hover')
g.set_all_edges(color=(0, 0, 0, 0.5))

In [3]:
bc = nx.betweenness_centrality(g)

In [4]:
import pandas as pd
data = pd.DataFrame({
    'id' : [i for i in g.nodes],
    'Name': [g.nodes[i]['name'] for i in g.nodes],
    'ConnAmnt': [g.nodes[i]['amount'] for i in g.nodes],
    'Intermediation (r)': [i for i in bc.values()],
})

In [5]:
char_infos = pd.read_csv('../dataset/characters_stats.csv', index_col=0)
semi_df = pd.merge(data, char_infos, how='inner', on=['Name']) 

In [6]:
import numpy as np
semi_df = semi_df.replace('nan', np.nan)
semi_df = semi_df[semi_df['ConnAmnt'].notna()]
semi_df['ConnAmnt'] = pd.to_numeric(semi_df['ConnAmnt'])

In [7]:
semi_df = semi_df.drop_duplicates(subset=['Name'], keep='first')
semi_df = semi_df[semi_df['Alignment'] != 'neutral']
semi_df = semi_df.replace(['good', 'bad'], [0, 1])

In [8]:
semi_df['Intelligence (%)'] = semi_df['Intelligence']/semi_df['Total']
semi_df['Power (%)'] = semi_df['Power']/semi_df['Total']
semi_df['Strength (%)'] = semi_df['Strength']/semi_df['Total']
semi_df['Combat (%)'] = semi_df['Combat']/semi_df['Total']

In [9]:
import statsmodels.api as sm

# model = sm.OLS(semi_df['Intermediation (r)'], semi_df[['Intelligence', 'Strength', 'Strength (%)', 
#                                                        'Power', 'Power (%)', 'Alignment']])
model = sm.OLS(semi_df['Intermediation (r)'], semi_df[['Intelligence', 'Strength', 'Power', 'Alignment']])
result = model.fit()
result.summary()


0,1,2,3
Dep. Variable:,Intermediation (r),R-squared (uncentered):,0.338
Model:,OLS,Adj. R-squared (uncentered):,0.322
Method:,Least Squares,F-statistic:,20.71
Date:,"Fri, 20 Nov 2020",Prob (F-statistic):,8.36e-14
Time:,17:18:56,Log-Likelihood:,622.8
No. Observations:,166,AIC:,-1238.0
Df Residuals:,162,BIC:,-1225.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intelligence,8.6e-05,1.95e-05,4.417,0.000,4.75e-05,0.000
Strength,5.846e-06,1.6e-05,0.365,0.715,-2.57e-05,3.74e-05
Power,-1.954e-05,1.99e-05,-0.980,0.329,-5.89e-05,1.99e-05
Alignment,-0.0029,0.001,-2.872,0.005,-0.005,-0.001

0,1,2,3
Omnibus:,131.727,Durbin-Watson:,1.207
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1243.587
Skew:,2.974,Prob(JB):,9.09e-271
Kurtosis:,15.017,Cond. No.,216.0


In [10]:
g = fm.load('marvelWeights.gml')
g.label_nodes('name')
g.set_all_nodes(size=15, labpos='hover')
g.set_all_edges(color=(0, 0, 0, 0.5))

intelligence = {}

for i in g.nodes:
    intelligence[i] = float(g.nodes[i]['intelligence'])

g.scale_nodes_size(intelligence)
#g.draw()

In [11]:
print(f"Coef:\n{result.params}\n")
print(f"std err:\n{result.bse}\n")
print(f"p-values:\n{result.pvalues}\n")

Coef:
Intelligence    0.000086
Strength        0.000006
Power          -0.000020
Alignment      -0.002945
dtype: float64

std err:
Intelligence    0.000019
Strength        0.000016
Power           0.000020
Alignment       0.001025
dtype: float64

p-values:
Intelligence    0.000018
Strength        0.715298
Power           0.328781
Alignment       0.004627
dtype: float64



In [12]:
# g = fm.load('marvelWeights.gml')
# g.label_nodes('name')
# g.set_all_nodes(size=15, labpos='hover')
# g.set_all_edges(color=(0, 0, 0, 0.5))

# intelligence = {}

# for i in g.nodes:
#     intelligence[i] = float(g.nodes[i]['intelligence'])

# g.scale_nodes_size(intelligence)
# g.draw()