In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import networkx as nx
import freeman as fm

In [2]:
g = fm.load('marvel.gml')
g.label_nodes('name')
g.set_all_nodes(size=15, labpos='hover')
g.set_all_edges(color=(0, 0, 0, 0.5))

In [3]:
bc = nx.betweenness_centrality(g)

In [4]:
import pandas as pd
data = pd.DataFrame({
    'id' : [i for i in g.nodes],
    'Name': [g.nodes[i]['name'] for i in g.nodes],
    'ConnAmnt': [g.nodes[i]['amount'] for i in g.nodes],
    'Intermediation (r)': [i for i in bc.values()],
})

In [5]:
char_infos = pd.read_csv('../dataset/characters_stats.csv', index_col=0)
semi_df = pd.merge(data, char_infos, how='inner', on=['Name']) 

In [6]:
import numpy as np
semi_df = semi_df.replace('nan', np.nan)
semi_df = semi_df[semi_df['ConnAmnt'].notna()]
semi_df['ConnAmnt'] = pd.to_numeric(semi_df['ConnAmnt'])

In [7]:
semi_df = semi_df.drop_duplicates(subset=['Name'], keep='first')
semi_df = semi_df[semi_df['Alignment'] != 'neutral']
semi_df = semi_df.replace(['good', 'bad'], [0, 1])

In [8]:
semi_df['Intelligence (%)'] = semi_df['Intelligence']/semi_df['Total']
semi_df['Power (%)'] = semi_df['Power']/semi_df['Total']
semi_df['Strength (%)'] = semi_df['Strength']/semi_df['Total']
semi_df['Combat (%)'] = semi_df['Combat']/semi_df['Total']

In [9]:
# Existem muitos personagens nesses dados cuja soma Total é composta de 
# Intelligence  Strength    Speed    Durability   Power   Combat  Total
#     1            1          1         1           0       1       5
semi_df = semi_df[semi_df['Total'] != 5]

In [10]:
import statsmodels.api as sm

# model = sm.OLS(semi_df['Intermediation (r)'], semi_df[['Intelligence', 'Strength', 'Strength (%)', 
#                                                        'Power', 'Power (%)', 'Alignment']])
model = sm.OLS(semi_df['Intermediation (r)'], semi_df[['Intelligence', 'Strength', 'Power', 'Alignment']])
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,Intermediation (r),R-squared (uncentered):,0.351
Model:,OLS,Adj. R-squared (uncentered):,0.332
Method:,Least Squares,F-statistic:,19.05
Date:,"Wed, 25 Nov 2020",Prob (F-statistic):,1.52e-12
Time:,16:04:10,Log-Likelihood:,536.16
No. Observations:,145,AIC:,-1064.0
Df Residuals:,141,BIC:,-1052.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intelligence,8.858e-05,2.07e-05,4.285,0.000,4.77e-05,0.000
Strength,7.15e-06,1.69e-05,0.422,0.674,-2.64e-05,4.07e-05
Power,-1.996e-05,2.11e-05,-0.946,0.346,-6.17e-05,2.18e-05
Alignment,-0.0037,0.001,-3.092,0.002,-0.006,-0.001

0,1,2,3
Omnibus:,114.093,Durbin-Watson:,1.223
Prob(Omnibus):,0.0,Jarque-Bera (JB):,912.7
Skew:,2.9,Prob(JB):,6.45e-199
Kurtosis:,13.837,Cond. No.,240.0


In [11]:
print(f"Coef:\n{result.params}\n")
print(f"std err:\n{result.bse}\n")
print(f"p-values:\n{result.pvalues}\n")

Coef:
Intelligence    0.000089
Strength        0.000007
Power          -0.000020
Alignment      -0.003723
dtype: float64

std err:
Intelligence    0.000021
Strength        0.000017
Power           0.000021
Alignment       0.001204
dtype: float64

p-values:
Intelligence    0.000034
Strength        0.673722
Power           0.345783
Alignment       0.002398
dtype: float64



In [12]:
pd.set_option('display.max_rows', semi_df.shape[0]+1)

In [13]:
semi_df['Total_ws'] = semi_df['Total'] - semi_df['Strength']

new = semi_df[['Strength', 'Durability', 'Total_ws']]

In [14]:
new.corr()

Unnamed: 0,Strength,Durability,Total_ws
Strength,1.0,0.628778,0.565524
Durability,0.628778,1.0,0.751434
Total_ws,0.565524,0.751434,1.0


In [23]:
semi_df.sort_values(by='Intermediation (r)', ascending=True).head(10)

Unnamed: 0,id,Name,ConnAmnt,Intermediation (r),Alignment,Intelligence,Strength,Speed,Durability,Power,Combat,Total,Intelligence (%),Power (%),Strength (%),Combat (%),Total_ws
156,1011213,Feral,45.0,2.4e-05,0,38,28,45,28,20,70,229,0.165939,0.087336,0.122271,0.305677,201
170,1009160,Arclight,58.0,3.1e-05,1,38,63,23,42,52,70,288,0.131944,0.180556,0.21875,0.243056,225
171,1011092,Leech,45.0,3.2e-05,0,25,5,12,14,62,14,132,0.189394,0.469697,0.037879,0.106061,127
150,1011304,Thundra,45.0,3.7e-05,0,38,81,32,64,26,54,295,0.128814,0.088136,0.274576,0.183051,214
167,1009322,Goblin Queen,61.0,3.7e-05,1,75,10,23,28,65,56,257,0.291829,0.252918,0.038911,0.217899,247
141,1009606,Snowbird,58.0,3.8e-05,0,50,32,27,42,63,52,266,0.18797,0.236842,0.120301,0.195489,234
115,1009360,Hydro-Man,45.0,4.8e-05,1,38,13,25,80,66,50,272,0.139706,0.242647,0.047794,0.183824,259
102,1009555,Sage,62.0,4.8e-05,0,75,10,12,14,28,56,195,0.384615,0.14359,0.051282,0.287179,185
70,1009303,Fin Fang Foom,64.0,5.4e-05,0,50,81,23,100,68,70,392,0.127551,0.173469,0.206633,0.178571,311
139,1010820,Thor Girl,34.0,6.2e-05,0,75,83,70,84,100,70,482,0.155602,0.207469,0.172199,0.145228,399


In [27]:
model = sm.OLS(semi_df['Intermediation (r)'], semi_df[['Intelligence', 'Strength', 'Alignment']])
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,Intermediation (r),R-squared (uncentered):,0.347
Model:,OLS,Adj. R-squared (uncentered):,0.333
Method:,Least Squares,F-statistic:,25.12
Date:,"Wed, 25 Nov 2020",Prob (F-statistic):,4.25e-13
Time:,16:12:47,Log-Likelihood:,535.71
No. Observations:,145,AIC:,-1065.0
Df Residuals:,142,BIC:,-1056.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intelligence,7.314e-05,1.27e-05,5.764,0.000,4.81e-05,9.82e-05
Strength,3.251e-06,1.64e-05,0.198,0.843,-2.92e-05,3.57e-05
Alignment,-0.0037,0.001,-3.042,0.003,-0.006,-0.001

0,1,2,3
Omnibus:,115.41,Durbin-Watson:,1.204
Prob(Omnibus):,0.0,Jarque-Bera (JB):,938.203
Skew:,2.939,Prob(JB):,1.87e-204
Kurtosis:,13.988,Cond. No.,191.0


In [28]:
model = sm.OLS(semi_df['Intermediation (r)'], semi_df[['Power', 'Strength', 'Alignment']])
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,Intermediation (r),R-squared (uncentered):,0.266
Model:,OLS,Adj. R-squared (uncentered):,0.251
Method:,Least Squares,F-statistic:,17.18
Date:,"Wed, 25 Nov 2020",Prob (F-statistic):,1.42e-09
Time:,16:12:47,Log-Likelihood:,527.29
No. Observations:,145,AIC:,-1049.0
Df Residuals:,142,BIC:,-1040.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Power,5.14e-05,1.37e-05,3.744,0.000,2.43e-05,7.85e-05
Strength,2.435e-05,1.74e-05,1.396,0.165,-1.01e-05,5.88e-05
Alignment,-0.0026,0.001,-2.101,0.037,-0.005,-0.000

0,1,2,3
Omnibus:,106.555,Durbin-Watson:,1.177
Prob(Omnibus):,0.0,Jarque-Bera (JB):,732.209
Skew:,2.714,Prob(JB):,1.0099999999999999e-159
Kurtosis:,12.577,Cond. No.,180.0


In [29]:
model = sm.OLS(semi_df['Intermediation (r)'], semi_df[['Intelligence', 'Power', 'Alignment']])
result = model.fit()
result.summary()

0,1,2,3
Dep. Variable:,Intermediation (r),R-squared (uncentered):,0.35
Model:,OLS,Adj. R-squared (uncentered):,0.336
Method:,Least Squares,F-statistic:,25.49
Date:,"Wed, 25 Nov 2020",Prob (F-statistic):,2.98e-13
Time:,16:12:47,Log-Likelihood:,536.07
No. Observations:,145,AIC:,-1066.0
Df Residuals:,142,BIC:,-1057.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intelligence,9.064e-05,2e-05,4.526,0.000,5.11e-05,0.000
Power,-1.78e-05,2.04e-05,-0.872,0.385,-5.81e-05,2.25e-05
Alignment,-0.0037,0.001,-3.072,0.003,-0.006,-0.001

0,1,2,3
Omnibus:,114.689,Durbin-Watson:,1.221
Prob(Omnibus):,0.0,Jarque-Bera (JB):,924.441
Skew:,2.917,Prob(JB):,1.82e-201
Kurtosis:,13.907,Cond. No.,215.0
