# Get Images for Each Character

In [2]:
## Found at https://stackoverflow.com/questions/8032642/how-to-obtain-image-size-using-standard-python-class-without-using-external-lib

import struct
import imghdr

def get_image_size(fname):
    '''Determine the image type of fhandle and return its size.
    from draco'''
    with open(fname, 'rb') as fhandle:
        head = fhandle.read(24)
        if len(head) != 24:
            return
        if imghdr.what(fname) == 'png':
            check = struct.unpack('>i', head[4:8])[0]
            if check != 0x0d0a1a0a:
                return
            width, height = struct.unpack('>ii', head[16:24])
        elif imghdr.what(fname) == 'gif':
            width, height = struct.unpack('<HH', head[6:10])
        elif imghdr.what(fname) == 'jpeg':
            try:
                fhandle.seek(0) # Read 0xff next
                size = 2
                ftype = 0
                while not 0xc0 <= ftype <= 0xcf:
                    fhandle.seek(size, 1)
                    byte = fhandle.read(1)
                    while ord(byte) == 0xff:
                        byte = fhandle.read(1)
                    ftype = ord(byte)
                    size = struct.unpack('>H', fhandle.read(2))[0] - 2
                # We are at a SOFn block
                fhandle.seek(1, 1)  # Skip `precision' byte.
                height, width = struct.unpack('>HH', fhandle.read(4))
            except Exception: #IGNORE:W0703
                return
        else:
            return
        return width, height

In [3]:
import os
image_sizes = []

character_image_files = os.listdir('/home/jordan/saltybetdata/scraping_scripts/character_images/')

for character_image_file in character_image_files:
    image_sizes.append((character_image_file.split('.gif')[0], get_image_size('/home/jordan/saltybetdata/scraping_scripts/character_images/' + character_image_file)))

In [4]:
import numpy as np
import seaborn as sns
sns.set()

image_matrix = np.array([[int(x[0]), x[1][0], x[1][1]] for x in image_sizes if x[1] != None])

In [5]:
import pandas as pd

character_image_df = pd.DataFrame(image_matrix, columns=['CharacterId', 'Width', 'Height'], dtype=int)
character_image_df = character_image_df.set_index('CharacterId')

chararacter_image_df = character_image_df[(~character_image_df['Width'].isnull())&(~character_image_df['Height'].isnull())]

# Getting Character Information and Joining

In [6]:
character_info = []

with open('/home/jordan/saltybetdata/scraping_scripts/character_information.txt') as input_file:
    for line in input_file:
        if '|||\n' in line:
            pass
        elif 'by\n' in line:
            character_info.append([int(line.split('|||')[0]), line.split('|||')[1].split(' by\n')[0], ''])
        else:
            character_info.append([int(line.split('|||')[0]), line.split('|||')[1].split(' by ')[0], ' by '.join(line.split(' by ')[1:]).replace('\n', '')])

In [7]:
character_info_df = pd.DataFrame(character_info, columns=['CharacterId', 'Name', 'Author'])
character_info_df = character_info_df.set_index('CharacterId')

In [8]:
# Join these two together

joined_dataset = character_info_df.join(character_image_df, on='CharacterId', rsuffix='_image')
joined_dataset.head()

Unnamed: 0_level_0,Name,Author,Width,Height
CharacterId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Mr. bison,"Vk, updated by terry kuo, adapted by tenebrous",122.0,99.0
2,Bullseye,Doom & o ilusionista,86.0,113.0
3,Elecman,O ilusionista & akitosama,94.0,80.0
4,Ooze-o,O ilusionista,93.0,125.0
5,Shin kazuma,O ilusionista,66.0,94.0


In [9]:
len(joined_dataset)

9664

In [11]:
# 153 characters missing information
print(len(joined_dataset[joined_dataset['Height'].isnull()]))
joined_dataset = joined_dataset[~joined_dataset['Width'].isnull()]


153


In [12]:
joined_dataset[joined_dataset['Width'].isnull()]

Unnamed: 0_level_0,Name,Author,Width,Height
CharacterId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


# Get Match Data

In [13]:
match_results = []

for match_file in os.listdir('/home/jordan/saltybetdata/scraping_scripts/match_results/'):
    with open('/home/jordan/saltybetdata/scraping_scripts/match_results/' + match_file) as input_file:
        match_result = input_file.read().split('\n')[0:2]
        if len(match_result) == 2:
            try:
                first_competitor = match_result[0].split(' vs ')[0]
                second_competitor = match_result[0].split(' vs ')[1].split(' at ')[0]
                winner = match_result[1][8:]
                if first_competitor == winner:
                    winner_id = 'Red'
                elif second_competitor == winner:
                    winner_id = 'Blue'
                else:
                    winner_id = 'N/A'
                    
                match_id = match_file.split('_')[2].split('.t')[0]
                match_results.append([int(match_id), first_competitor, second_competitor, winner_id])
            except Exception as e:
                print(e)
                print(match_result)

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

In [14]:
match_df = pd.DataFrame(match_results, columns=['MatchId', 'Red', 'Blue', 'WinnerId'])

In [15]:
# Total number of matches
len(match_df) 

946172

In [16]:
# Remove exhibition teams
match_df = match_df[~(match_df['Red'].str.contains('Team')|match_df['Blue'].str.contains('Team'))]

In [17]:
len(match_df)

861280

In [18]:
# Remove ties
match_df = match_df[match_df['WinnerId'] != 'N/A']

In [19]:
len(match_df)

856230

In [20]:
# Remove self fights
print(len(match_df[match_df['Blue'] == match_df['Red']]))
match_df = match_df[match_df['Blue'] != match_df['Red']]

76


In [21]:
# Total number of matches which align with a known character
match_df = match_df[(match_df['Red'].isin(joined_dataset['Name']))&(match_df['Blue'].isin(joined_dataset['Name']))]
len(match_df)

810422

In [22]:
match_df = match_df[match_df['Blue'].isin(joined_dataset['Name'])&(match_df['Red'].isin(joined_dataset['Name']))]

In [23]:
match_df['RedWidth'] = None
match_df['RedHeight'] = None
match_df['RedId'] = None
match_df['BlueWidth'] = None
match_df['BlueHeight'] = None
match_df['BlueId'] = None

In [212]:
joined_dataset.to_csv('/home/jordan/character_information.csv')

In [213]:
match_df.to_csv('/home/jordan/match_data.csv')

In [24]:
match_df[['MatchId', 'Red', 'Blue', 'WinnerId']].to_csv('/home/jordan/match_data.csv')

In [25]:
temp_dict = match_df.to_dict('index')

In [26]:
character_dict = joined_dataset.reset_index().set_index('Name').to_dict('index')

In [27]:
for item in temp_dict:
    try:
        temp_dict[item]['RedId'] = character_dict[temp_dict[item]['Red']]['CharacterId']
        temp_dict[item]['RedWidth'] = character_dict[temp_dict[item]['Red']]['Width']
        temp_dict[item]['RedHeight'] = character_dict[temp_dict[item]['Red']]['Height']
        temp_dict[item]['BlueId'] = character_dict[temp_dict[item]['Blue']]['CharacterId']
        temp_dict[item]['BlueWidth'] = character_dict[temp_dict[item]['Blue']]['Width']
        temp_dict[item]['BlueHeight'] = character_dict[temp_dict[item]['Blue']]['Height']
        if item % 10000 == 0:
            print(item)
    except:
        print(temp_dict[item]['Red'], temp_dict[item]['Blue'])

0
10000
20000
30000
40000
60000
70000
80000
90000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
220000
230000
240000
250000
260000
270000
280000
290000
300000
320000
330000
340000
350000
360000
380000
390000
400000
410000
420000
430000
440000
450000
460000
470000
490000
500000
510000
520000
530000
540000
550000
560000
570000
580000
590000
600000
610000
620000
630000
640000
650000
660000
670000
680000
690000
710000
720000
740000
750000
760000
770000
780000
790000
800000
810000
820000
840000
850000
860000
870000
880000
890000
910000
920000
930000
940000


In [218]:
pd.DataFrame(temp_dict).transpose().to_csv('/home/jordan/joined_match_data.csv')

# Constructing Win Matrices

In [28]:
complete_match_data = pd.DataFrame(temp_dict).transpose()

In [29]:
len(complete_match_data)

810422

In [30]:
len(list(set(complete_match_data['Blue'].values).union(set(complete_match_data['Red']))))

9494

In [31]:
# Average number of matches per character
x = complete_match_data['Blue'].value_counts() + complete_match_data['Red'].value_counts()
x.mean()
x.median()

156.0

In [None]:
# Check if strongly connected

In [32]:
# Creating Test and Train Datasets

## Train Dataset

msk = np.random.rand(len(complete_match_data)) < 0.9

test_match_data = complete_match_data[~msk]
complete_match_data = complete_match_data[msk]

In [34]:
test_match_data

Unnamed: 0,Blue,BlueHeight,BlueId,BlueWidth,MatchId,Red,RedHeight,RedId,RedWidth,WinnerId
10,Chaos satellite,4,5316,4,328467,Gn-pioneer,216,5543,162,Red
35,Mech-hisui,103,1718,44,4432,Shiki tohno,111,1722,55,Blue
44,Bruce shotoman,98,8833,66,315861,Shiki vp,128,1829,48,Red
56,Byakuya,204,319,262,679141,Takuma945,105,6197,60,Red
65,Kraken,84,1220,60,58760,Cyborg superman,156,511,153,Red
70,Takano_miyo,105,2149,41,730327,Bloody flandre,112,5289,119,Blue
96,Fire quacker,128,8461,121,626091,Ohga,160,5146,85,Blue
116,Char's zaku,95,368,71,843746,Shaia hishizaki,164,12312,75,Blue
140,Robo-segalow,131,4360,73,359412,Satori EX,90,6080,52,Red
152,Iron_miyako,84,5632,52,368714,Tin's fernandeath,190,682,175,Red


In [232]:
natural_indexed_dict = joined_dataset.reset_index().reset_index().set_index('Name').to_dict('index')
natural_indexed_dict

{'Mr. bison': {'index': 0,
  'CharacterId': 1,
  'Author': 'Vk, updated by terry kuo, adapted by tenebrous',
  'Width': 122.0,
  'Height': 99.0},
 'Bullseye': {'index': 1,
  'CharacterId': 2,
  'Author': 'Doom & o ilusionista',
  'Width': 86.0,
  'Height': 113.0},
 'Elecman': {'index': 2,
  'CharacterId': 3,
  'Author': 'O ilusionista & akitosama',
  'Width': 94.0,
  'Height': 80.0},
 'Ooze-o': {'index': 3,
  'CharacterId': 4,
  'Author': 'O ilusionista',
  'Width': 93.0,
  'Height': 125.0},
 'Shin kazuma': {'index': 4,
  'CharacterId': 5,
  'Author': 'O ilusionista',
  'Width': 66.0,
  'Height': 94.0},
 'Mexican typhoon': {'index': 5,
  'CharacterId': 6,
  'Author': 'O ilusionista',
  'Width': 124.0,
  'Height': 131.0},
 'Zangief maskered': {'index': 6,
  'CharacterId': 7,
  'Author': 'O ilusionista',
  'Width': 138.0,
  'Height': 115.0},
 'Predalien': {'index': 7,
  'CharacterId': 8,
  'Author': 'Josipknezovicz',
  'Width': 237.0,
  'Height': 130.0},
 'Eva-00': {'index': 8,
  'Charac

In [233]:
win_matrix = np.zeros((len(joined_dataset), len(joined_dataset)))
for match in complete_match_data.iterrows():
    if match[0] % 10000 == 0:
        print(match[0])
    winner_index = natural_indexed_dict[match[1][match[1]['WinnerId']]]['index']
    if match[1]['WinnerId'] == 'Red':
        loser_index = natural_indexed_dict[match[1]['Blue']]['index']
    else:
        loser_index = natural_indexed_dict[match[1]['Red']]['index']
    win_matrix[winner_index, loser_index] = win_matrix[winner_index, loser_index] + 1

0
10000
20000
30000
40000
60000
70000
80000
90000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
220000
230000
240000
250000
260000
270000
280000
290000
300000
320000
330000
340000
350000
360000
380000
390000
400000
410000
420000
430000
440000
450000
460000
470000
490000
500000
510000
520000
530000
540000
550000
560000
570000
580000
590000
600000
610000
620000
630000
640000
650000
660000
670000
680000
690000
710000
720000
740000
750000
760000
770000
780000
790000
800000
810000
820000
840000
850000
860000
870000
880000
890000
910000
920000
930000
940000


In [236]:
adjacency_dict = {}
for row_index in range(len(win_matrix)):
    adjacency_dict[row_index] = []
    if row_index % 1000 == 0:
        print(row_index)
    for col_index in range(len(win_matrix)):
        if win_matrix[row_index, col_index] > 0:
            adjacency_dict[row_index].append(col_index)
    
print(adjacency_dict)

0
1000
2000
3000
4000
5000
6000
7000
8000
9000


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [248]:
from tarjan import tarjan
tarjan_output = tarjan(adjacency_dict)
removal_list = []
for item in tarjan_output:
    if len(item) == 1:
        removal_list.append(item[0])

In [1]:
removal_list

NameError: name 'removal_list' is not defined

In [104]:
np.savetxt("/home/jordan/basic_win_matrix.csv", win_matrix, delimiter=",")

In [105]:
hitbox_dict = joined_dataset.reset_index().to_dict('index')
hitbox_dict

{0: {'CharacterId': 1,
  'Name': 'Mr. bison',
  'Author': 'Vk, updated by terry kuo, adapted by tenebrous',
  'Width': 122.0,
  'Height': 99.0},
 1: {'CharacterId': 2,
  'Name': 'Bullseye',
  'Author': 'Doom & o ilusionista',
  'Width': 86.0,
  'Height': 113.0},
 2: {'CharacterId': 3,
  'Name': 'Elecman',
  'Author': 'O ilusionista & akitosama',
  'Width': 94.0,
  'Height': 80.0},
 3: {'CharacterId': 4,
  'Name': 'Ooze-o',
  'Author': 'O ilusionista',
  'Width': 93.0,
  'Height': 125.0},
 4: {'CharacterId': 5,
  'Name': 'Shin kazuma',
  'Author': 'O ilusionista',
  'Width': 66.0,
  'Height': 94.0},
 5: {'CharacterId': 6,
  'Name': 'Mexican typhoon',
  'Author': 'O ilusionista',
  'Width': 124.0,
  'Height': 131.0},
 6: {'CharacterId': 7,
  'Name': 'Zangief maskered',
  'Author': 'O ilusionista',
  'Width': 138.0,
  'Height': 115.0},
 7: {'CharacterId': 8,
  'Name': 'Predalien',
  'Author': 'Josipknezovicz',
  'Width': 237.0,
  'Height': 130.0},
 8: {'CharacterId': 9,
  'Name': 'Eva-00'

In [108]:
height_advantage_win_matrix = np.zeros((len(joined_dataset), len(joined_dataset)))
height_advantage_loss_matrix = np.zeros((len(joined_dataset), len(joined_dataset)))
for match in complete_match_data.iterrows():
    if match[0] % 10000 == 0:
        print(match[0])
    winner_index = natural_indexed_dict[match[1][match[1]['WinnerId']]]['index']
    if match[1]['WinnerId'] == 'Red':
        loser_index = natural_indexed_dict[match[1]['Blue']]['index']
    else:
        loser_index = natural_indexed_dict[match[1]['Red']]['index']
    if hitbox_dict[winner_index]['Height'] > hitbox_dict[loser_index]['Height']:
        height_advantage_win_matrix[winner_index, loser_index] = height_advantage_win_matrix[winner_index, loser_index] + 1
    else:
        height_advantage_loss_matrix[winner_index, loser_index] = height_advantage_loss_matrix[winner_index, loser_index] + 1
        
np.savetxt("/home/jordan/height_advantage_win_matrix.csv", height_advantage_win_matrix, delimiter=",")
np.savetxt("/home/jordan/height_advantage_loss_matrix.csv", height_advantage_loss_matrix, delimiter=",")

0
10000
20000
30000
40000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
220000
230000
240000
250000
260000
270000
280000
290000
300000
320000
330000
340000
350000
360000
380000
390000
400000
410000
420000
430000
440000
450000
460000
470000
490000
500000
510000
520000
530000
540000
550000
560000
570000
580000
590000
600000
610000
620000
630000
640000
650000
660000
670000
680000
690000
710000
720000
740000
750000
760000
770000
780000
790000
800000
810000
820000
840000
850000
860000
870000
880000
890000
910000
920000
930000
940000


In [107]:
width_advantage_win_matrix = np.zeros((len(joined_dataset), len(joined_dataset)))
width_advantage_loss_matrix = np.zeros((len(joined_dataset), len(joined_dataset)))
for match in complete_match_data.iterrows():
    if match[0] % 10000 == 0:
        print(match[0])
    winner_index = natural_indexed_dict[match[1][match[1]['WinnerId']]]['index']
    if match[1]['WinnerId'] == 'Red':
        loser_index = natural_indexed_dict[match[1]['Blue']]['index']
    else:
        loser_index = natural_indexed_dict[match[1]['Red']]['index']
    if hitbox_dict[winner_index]['Width'] > hitbox_dict[loser_index]['Width']:
        width_advantage_win_matrix[winner_index, loser_index] = width_advantage_win_matrix[winner_index, loser_index] + 1
    else:
        width_advantage_loss_matrix[winner_index, loser_index] = width_advantage_loss_matrix[winner_index, loser_index] + 1
        
np.savetxt("/home/jordan/width_advantage_win_matrix.csv", width_advantage_win_matrix, delimiter=",")
np.savetxt("/home/jordan/width_advantage_loss_matrix.csv", width_advantage_loss_matrix, delimiter=",")

0
10000
20000
30000
40000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
220000
230000
240000
250000
260000
270000
280000
290000
300000
320000
330000
340000
350000
360000
380000
390000
400000
410000
420000
430000
440000
450000
460000
470000
490000
500000
510000
520000
530000
540000
550000
560000
570000
580000
590000
600000
610000
620000
630000
640000
650000
660000
670000
680000
690000
710000
720000
740000
750000
760000
770000
780000
790000
800000
810000
820000
840000
850000
860000
870000
880000
890000
910000
920000
930000
940000
