In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import networkx as nx
import freeman as fm

In [2]:
g = fm.load('marvel.gml')
g.label_nodes('name')
g.set_all_nodes(size=15, labpos='hover')
g.set_all_edges(color=(0, 0, 0, 0.5))
g.draw()

In [3]:
bc = nx.betweenness_centrality(g)

In [4]:
import pandas as pd
data = pd.DataFrame({
    'id' : [i for i in g.nodes],
    'Name': [g.nodes[i]['name'] for i in g.nodes],
    'ConnAmnt': [g.nodes[i]['amount'] for i in g.nodes],
    'Intermediation (r)': [i for i in bc.values()],
})

In [5]:
char_infos = pd.read_csv('../dataset/characters_stats.csv', index_col=0)
semi_df = pd.merge(data, char_infos, how='inner', on=['Name']) 

In [6]:
import numpy as np
semi_df = semi_df.replace('nan', np.nan)
semi_df = semi_df[semi_df['ConnAmnt'].notna()]
semi_df['ConnAmnt'] = pd.to_numeric(semi_df['ConnAmnt'])

In [7]:
semi_df = semi_df.drop_duplicates(subset=['Name'], keep='first')

In [8]:
# Existem muitos personagens nesses dados cuja soma Total é composta de 
# Intelligence  Strength    Speed    Durability   Power   Combat  Total
#     1            1          1         1           0       1       5
semi_df = semi_df[semi_df['Total'] != 5]

In [9]:
import statsmodels.api as sm

model = sm.OLS(semi_df['Intermediation (r)'], semi_df[['Intelligence', 'Strength', 'Power']])
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,Intermediation (r),R-squared (uncentered):,0.412
Model:,OLS,Adj. R-squared (uncentered):,0.396
Method:,Least Squares,F-statistic:,25.9
Date:,"Mon, 23 Nov 2020",Prob (F-statistic):,8.95e-13
Time:,23:00:01,Log-Likelihood:,412.17
No. Observations:,114,AIC:,-818.3
Df Residuals:,111,BIC:,-810.1
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intelligence,0.0001,2.72e-05,4.370,0.000,6.49e-05,0.000
Strength,-1.189e-06,2.11e-05,-0.056,0.955,-4.3e-05,4.06e-05
Power,-3.875e-05,2.62e-05,-1.479,0.142,-9.07e-05,1.32e-05

0,1,2,3
Omnibus:,92.202,Durbin-Watson:,1.013
Prob(Omnibus):,0.0,Jarque-Bera (JB):,712.858
Skew:,2.766,Prob(JB):,1.6e-155
Kurtosis:,13.931,Cond. No.,5.64


In [10]:
print(f"Coef:\n{result.params}\n")
print(f"std err:\n{result.bse}\n")
print(f"p-values:\n{result.pvalues}\n")

Coef:
Intelligence    0.000119
Strength       -0.000001
Power          -0.000039
dtype: float64

std err:
Intelligence    0.000027
Strength        0.000021
Power           0.000026
dtype: float64

p-values:
Intelligence    0.000028
Strength        0.955165
Power           0.141960
dtype: float64



In [11]:
len(semi_df)

114

In [12]:
semi_df.sort_values(by='Intermediation (r)', ascending=False).head(25)

Unnamed: 0,id,Name,ConnAmnt,Intermediation (r),Alignment,Intelligence,Strength,Speed,Durability,Power,Combat,Total
9,1009610,Spider-Man,121.0,0.043216,good,88,55,60,74,58,85,420
8,1009718,Wolverine,122.0,0.036894,good,55,32,38,100,44,100,369
20,1009351,Hulk,112.0,0.02135,good,88,100,47,100,41,85,461
10,1009368,Iron Man,112.0,0.02097,good,100,85,58,85,100,64,492
4,1009175,Beast,117.0,0.019794,good,88,48,35,56,35,84,346
34,1009662,Thing,108.0,0.016904,good,75,84,21,90,26,80,376
0,1009220,Captain America,109.0,0.016422,good,63,19,35,56,46,100,319
6,1009257,Cyclops,110.0,0.014915,good,75,10,23,42,76,80,306
2,1009471,Nick Fury,97.0,0.014421,good,75,11,23,42,25,100,276
5,1009243,Colossus,113.0,0.014256,good,63,83,33,100,46,80,405
