# Get Images for Each Character

In [1]:
## Found at https://stackoverflow.com/questions/8032642/how-to-obtain-image-size-using-standard-python-class-without-using-external-lib

import struct
import imghdr

def get_image_size(fname):
    '''Determine the image type of fhandle and return its size.
    from draco'''
    with open(fname, 'rb') as fhandle:
        head = fhandle.read(24)
        if len(head) != 24:
            return
        if imghdr.what(fname) == 'png':
            check = struct.unpack('>i', head[4:8])[0]
            if check != 0x0d0a1a0a:
                return
            width, height = struct.unpack('>ii', head[16:24])
        elif imghdr.what(fname) == 'gif':
            width, height = struct.unpack('<HH', head[6:10])
        elif imghdr.what(fname) == 'jpeg':
            try:
                fhandle.seek(0) # Read 0xff next
                size = 2
                ftype = 0
                while not 0xc0 <= ftype <= 0xcf:
                    fhandle.seek(size, 1)
                    byte = fhandle.read(1)
                    while ord(byte) == 0xff:
                        byte = fhandle.read(1)
                    ftype = ord(byte)
                    size = struct.unpack('>H', fhandle.read(2))[0] - 2
                # We are at a SOFn block
                fhandle.seek(1, 1)  # Skip `precision' byte.
                height, width = struct.unpack('>HH', fhandle.read(4))
            except Exception: #IGNORE:W0703
                return
        else:
            return
        return width, height

In [2]:
import os
image_sizes = []

character_image_files = os.listdir('/home/jordan/saltybetdata/scraping_scripts/character_images/')

for character_image_file in character_image_files:
    image_sizes.append((character_image_file.split('.gif')[0], get_image_size('/home/jordan/saltybetdata/scraping_scripts/character_images/' + character_image_file)))

In [3]:
import numpy as np
import seaborn as sns
sns.set()

image_matrix = np.array([[int(x[0]), x[1][0], x[1][1]] for x in image_sizes if x[1] != None])

In [4]:
import pandas as pd

character_image_df = pd.DataFrame(image_matrix, columns=['CharacterId', 'Width', 'Height'], dtype=int)
character_image_df = character_image_df.set_index('CharacterId')

chararacter_image_df = character_image_df[(~character_image_df['Width'].isnull())&(~character_image_df['Height'].isnull())]

# Getting Character Information and Joining

In [9]:
character_info = []

with open('/home/jordan/saltybetdata/scraping_scripts/character_information.txt') as input_file:
    for line in input_file:
        if '|||\n' in line:
            pass
        elif 'by\n' in line:
            character_info.append([int(line.split('|||')[0]), line.split('|||')[1].split(' by\n')[0], ''])
        else:
            character_info.append([int(line.split('|||')[0]), line.split('|||')[1].split(' by ')[0], ' by '.join(line.split(' by ')[1:]).replace('\n', '')])

In [10]:
character_info_df = pd.DataFrame(character_info, columns=['CharacterId', 'Name', 'Author'])
character_info_df = character_info_df.set_index('CharacterId')

In [11]:
# Join these two together

joined_dataset = character_info_df.join(character_image_df, on='CharacterId', rsuffix='_image')
joined_dataset.head()

Unnamed: 0_level_0,Name,Author,Width,Height
CharacterId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Mr. bison,"Vk, updated by terry kuo, adapted by tenebrous",122.0,99.0
2,Bullseye,Doom & o ilusionista,86.0,113.0
3,Elecman,O ilusionista & akitosama,94.0,80.0
4,Ooze-o,O ilusionista,93.0,125.0
5,Shin kazuma,O ilusionista,66.0,94.0


# Get Match Data

In [12]:
match_results = []

for match_file in os.listdir('/home/jordan/saltybetdata/scraping_scripts/match_results/'):
    with open('/home/jordan/saltybetdata/scraping_scripts/match_results/' + match_file) as input_file:
        match_result = input_file.read().split('\n')[0:2]
        if len(match_result) == 2:
            try:
                first_competitor = match_result[0].split(' vs ')[0]
                second_competitor = match_result[0].split(' vs ')[1].split(' at ')[0]
                winner = match_result[1][8:]
                if first_competitor == winner:
                    winner_id = 'Red'
                elif second_competitor == winner:
                    winner_id = 'Blue'
                else:
                    winner_id = 'N/A'
                    
                match_id = match_file.split('_')[2].split('.t')[0]
                match_results.append([int(match_id), first_competitor, second_competitor, winner_id])
            except Exception as e:
                print(e)
                print(match_result)

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of range
['vs at', 'Winner: N/A']
list index out of ra

In [13]:
match_df = pd.DataFrame(match_results, columns=['MatchId', 'Red', 'Blue', 'WinnerId'])

In [14]:
# Total number of matches
len(match_df) 

946172

In [15]:
# Total number of matches which align with a known character
match_df = match_df[(match_df['Red'].isin(joined_dataset['Name']))&(match_df['Blue'].isin(joined_dataset['Name']))]
len(match_df)

844499

In [25]:
match_df['RedWidth'] = None
match_df['RedHeight'] = None
match_df['RedId'] = None
match_df['BlueWidth'] = None
match_df['BlueHeight'] = None
match_df['BlueId'] = None

In [27]:
for character in joined_dataset.iterrows():
    print(character[0])
    match_df[match_df['Red'] == character[1]['Name']]['RedId'] = character[0]

1
2


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


3
4
5
6
7
8
9
10
11
12
13
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
116
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
145
146
147
148
149
150
151
152
154
155
156
157
158
159
160
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
261
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
29

1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1971
1972
1973
1974
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2155


5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5369
5370
5371
5372
5373
5374
5375
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5421
5422
5423
5424
5425
5426
5427
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5448
5449
5450
5451
5452
5453
5454
5455


6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6964
6965
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987
7988
7989
7990
7991
7993
7994
7996
7997
7998
7999
8001
8002
8003
8005
8006
8007
8008
8009
8010
8011
8012
8013
8014
8015
8016
8017
8018
8019
8020
8021
8022
8023
8024
8025
8026
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8041
8042
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8060
8061
8062
8063
8064
8065
8066
8067
8068
8069
8070
8071
8072
8073
8074
8075
8076
8077
8078
8079
8081
8082
8083
8084
8085
8086
8087
8088
8089
8090
8091
8092
8093
8094
8095
8096
8097
8098
8099
8100
8101
8102
8103
8104
8105
8106
8107
8108
8109
8110
8111
8112
8113
8114
8115
8116
8117
8118
8119
8120
8121
8122
8123
8124
8125
8127
8128
8129
8130
8131
8132
8133
8134
8135
8136
8137
8138
8140
8141
8142
8144
8146
8147
8148
8150
8151
8152
8153
8154
8155
8156
8157
8158
8159
8160
8161
8162


10077
10078
10079
10080
10081
10082
10083
10084
10085
10086
10087
10088
10089
10090
10091
10092
10093
10094
10095
10096
10097
10098
10099
10100
10101
10102
10103
10104
10105
10106
10107
10108
10109
10110
10111
10112
10113
10114
10115
10116
10117
10118
10119
10120
10122
10123
10124
10125
10126
10127
10128
10129
10130
10131
10132
10133
10134
10135
10136
10137
10138
10139
10140
10141
10142
10143
10144
10145
10146
10147
10148
10149
10150
10151
10152
10153
10154
10155
10157
10158
10159
10160
10161
10162
10163
10164
10165
10166
10167
10168
10169
10170
10171
10172
10173
10175
10176
10177
10178
10179
10180
10181
10182
10183
10184
10185
10186
10187
10188
10189
10190
10191
10192
10194
10195
10196
10198
10199
10200
10201
10202
10203
10204
10205
10206
10207
10208
10209
10210
10211
10212
10213
10214
10215
10217
10218
10219
10221
10222
10223
10224
10225
10226
10227
10228
10229
10230
10231
10232
10233
10234
10235
10236
10237
10238
10239
10240
10241
10242
10243
10244
10245
10246
10247
10248
10249
1025

11974
11975
11976
11977
11978
11979
11980
11981
11982
11983
11984
11985
11987
11988
11989
11990
11991
11992
11993
11994
11995
11996
11997
11998
11999
12000
12001
12002
12003
12004
12005
12006
12007
12008
12009
12010
12011
12012
12013
12014
12015
12016
12017
12018
12019
12020
12021
12022
12023
12024
12025
12026
12027
12028
12029
12030
12031
12032
12033
12034
12035
12036
12037
12038
12039
12040
12041
12042
12043
12044
12045
12046
12047
12049
12050
12051
12052
12053
12054
12055
12056
12057
12058
12059
12060
12061
12062
12063
12064
12065
12066
12067
12068
12069
12070
12071
12072
12073
12074
12075
12076
12077
12078
12079
12080
12081
12082
12083
12084
12085
12086
12087
12088
12089
12090
12091
12092
12093
12094
12095
12096
12097
12098
12099
12100
12101
12102
12103
12104
12105
12106
12107
12108
12109
12110
12111
12112
12113
12114
12115
12116
12117
12118
12119
12120
12121
12122
12123
12124
12125
12126
12127
12128
12129
12130
12131
12132
12133
12134
12135
12136
12137
12138
12139
12140
12141
1214

13346
13347
13348
13349
13350
13351
13352
13353
13354
13355
13356
13357
13358
13359
13360
13361
13362
13363
13364
13365
13366
13367
13368
13369
13370
13371
13372
13373
13374
13375
13376
13377
13378
13379
13380
13381
13382
13383
13384
13385
13386
13387
13388
13389
13390
13391
13392
13393
13394
13395
13396
13397
13398
13399
13400
13401
13402
13403
13404
13405
13406
13407
13408
13409
13410
13411
13412
13413
13414
13415
13416
13417
13418
13419
13420
13421
13422
13423
13424
13425
13426
13427
13428
13429
13430
13431
13432
13433
13434
13435
13436
13437
13438
13439
13440
13441
13442
13443
13444
13445
13446
13447
13448
13449
13450
13451
13452
13453
13454
13455
13456
13457
13458
13459
13460
13461
13462
13463
13464
13465
13466
13467
13468
13469
13470
13471
13472
13473
13474
13475
13476
13477
13478
13479
13480
13481
13482
13483
13484
13485
13486
13487
13488
13489
13490
13491
13492
13493
13494
13495
13496
13497
13498
13499
13500
13501
13502
13503
13504
13505
13506
13507
13508
13509
13510
13511
1351

In [28]:
for character in joined_dataset.iterrows():
    print(character[0])
    match_df[match_df['Blue'] == character[1]['Name']]['BlueId'] = character[0]

1
2
3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


4
5
6
7
8
9
10
11
12
13
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
116
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
145
146
147
148
149
150
151
152
154
155
156
157
158
159
160
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
261
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294


1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1971
1972
1973
1974
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2155
2156
2157


5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5369
5370
5371
5372
5373
5374
5375
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5421
5422
5423
5424
5425
5426
5427
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459


6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6964
6965
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987
7988
7989
7990
7991
7993
7994
7996
7997
7998
7999
8001
8002
8003
8005
8006
8007
8008
8009
8010
8011
8012
8013
8014
8015
8016
8017
8018
8019
8020
8021
8022
8023
8024
8025
8026
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8041
8042
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058
8060
8061
8062
8063
8064
8065
8066
8067
8068
8069
8070
8071
8072
8073
8074
8075
8076
8077
8078
8079
8081
8082
8083
8084
8085
8086
8087
8088
8089
8090
8091
8092
8093
8094
8095
8096
8097
8098
8099
8100
8101
8102
8103
8104
8105
8106
8107
8108
8109
8110
8111
8112
8113
8114
8115
8116
8117
8118
8119
8120
8121
8122
8123
8124
8125
8127
8128
8129
8130
8131
8132
8133
8134
8135
8136
8137
8138
8140
8141
8142
8144
8146
8147
8148
8150
8151
8152
8153
8154
8155
8156
8157
8158
8159
8160
8161
8162
8163
8164
8165
8166


10079
10080
10081
10082
10083
10084
10085
10086
10087
10088
10089
10090
10091
10092
10093
10094
10095
10096
10097
10098
10099
10100
10101
10102
10103
10104
10105
10106
10107
10108
10109
10110
10111
10112
10113
10114
10115
10116
10117
10118
10119
10120
10122
10123
10124
10125
10126
10127
10128
10129
10130
10131
10132
10133
10134
10135
10136
10137
10138
10139
10140
10141
10142
10143
10144
10145
10146
10147
10148
10149
10150
10151
10152
10153
10154
10155
10157
10158
10159
10160
10161
10162
10163
10164
10165
10166
10167
10168
10169
10170
10171
10172
10173
10175
10176
10177
10178
10179
10180
10181
10182
10183
10184
10185
10186
10187
10188
10189
10190
10191
10192
10194
10195
10196
10198
10199
10200
10201
10202
10203
10204
10205
10206
10207
10208
10209
10210
10211
10212
10213
10214
10215
10217
10218
10219
10221
10222
10223
10224
10225
10226
10227
10228
10229
10230
10231
10232
10233
10234
10235
10236
10237
10238
10239
10240
10241
10242
10243
10244
10245
10246
10247
10248
10249
10250
10251
1025

11976
11977
11978
11979
11980
11981
11982
11983
11984
11985
11987
11988
11989
11990
11991
11992
11993
11994
11995
11996
11997
11998
11999
12000
12001
12002
12003
12004
12005
12006
12007
12008
12009
12010
12011
12012
12013
12014
12015
12016
12017
12018
12019
12020
12021
12022
12023
12024
12025
12026
12027
12028
12029
12030
12031
12032
12033
12034
12035
12036
12037
12038
12039
12040
12041
12042
12043
12044
12045
12046
12047
12049
12050
12051
12052
12053
12054
12055
12056
12057
12058
12059
12060
12061
12062
12063
12064
12065
12066
12067
12068
12069
12070
12071
12072
12073
12074
12075
12076
12077
12078
12079
12080
12081
12082
12083
12084
12085
12086
12087
12088
12089
12090
12091
12092
12093
12094
12095
12096
12097
12098
12099
12100
12101
12102
12103
12104
12105
12106
12107
12108
12109
12110
12111
12112
12113
12114
12115
12116
12117
12118
12119
12120
12121
12122
12123
12124
12125
12126
12127
12128
12129
12130
12131
12132
12133
12134
12135
12136
12137
12138
12139
12140
12141
12142
12143
1214

13347
13348
13349
13350
13351
13352
13353
13354
13355
13356
13357
13358
13359
13360
13361
13362
13363
13364
13365
13366
13367
13368
13369
13370
13371
13372
13373
13374
13375
13376
13377
13378
13379
13380
13381
13382
13383
13384
13385
13386
13387
13388
13389
13390
13391
13392
13393
13394
13395
13396
13397
13398
13399
13400
13401
13402
13403
13404
13405
13406
13407
13408
13409
13410
13411
13412
13413
13414
13415
13416
13417
13418
13419
13420
13421
13422
13423
13424
13425
13426
13427
13428
13429
13430
13431
13432
13433
13434
13435
13436
13437
13438
13439
13440
13441
13442
13443
13444
13445
13446
13447
13448
13449
13450
13451
13452
13453
13454
13455
13456
13457
13458
13459
13460
13461
13462
13463
13464
13465
13466
13467
13468
13469
13470
13471
13472
13473
13474
13475
13476
13477
13478
13479
13480
13481
13482
13483
13484
13485
13486
13487
13488
13489
13490
13491
13492
13493
13494
13495
13496
13497
13498
13499
13500
13501
13502
13503
13504
13505
13506
13507
13508
13509
13510
13511
13512
1351

In [30]:
joined_dataset.to_csv('/home/jordan/character_information.csv')

In [34]:
match_df.to_csv('/home/jordan/match_data.csv')

In [46]:
match_df[['MatchId', 'Red', 'Blue', 'WinnerId']].to_csv('/home/jordan/match_data.csv')

In [38]:
temp_dict = match_df.to_dict('index')

In [53]:
character_dict = joined_dataset.reset_index().set_index('Name').to_dict('index')

In [56]:
for item in temp_dict:
    temp_dict[item]['RedId'] = character_dict[temp_dict[item]['Red']]['CharacterId']
    temp_dict[item]['RedWidth'] = character_dict[temp_dict[item]['Red']]['Width']
    temp_dict[item]['RedHeight'] = character_dict[temp_dict[item]['Red']]['Height']
    temp_dict[item]['BlueId'] = character_dict[temp_dict[item]['Blue']]['CharacterId']
    temp_dict[item]['BlueWidth'] = character_dict[temp_dict[item]['Blue']]['Width']
    temp_dict[item]['BlueHeight'] = character_dict[temp_dict[item]['Blue']]['Height']
    if item % 10000 == 0:
        print(item)

0
10000
20000
30000
40000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
220000
230000
240000
250000
260000
270000
280000
290000
300000
320000
330000
340000
350000
360000
380000
390000
400000
410000
420000
430000
440000
450000
460000
470000
490000
500000
510000
520000
530000
540000
550000
560000
570000
580000
590000
600000
610000
620000
630000
640000
650000
660000
670000
680000
690000
710000
720000
740000
750000
760000
770000
780000
790000
800000
810000
820000
840000
850000
860000
870000
880000
890000
910000
920000
930000
940000


In [60]:
pd.DataFrame(temp_dict).transpose().to_csv('/home/jordan/joined_match_data.csv')

# Constructing Win Matrices

In [61]:
complete_match_data = pd.DataFrame(temp_dict).transpose()

In [92]:
natural_indexed_dict = joined_dataset.reset_index().reset_index().set_index('Name').to_dict('index')
natural_indexed_dict

{'Mr. bison': {'index': 0,
  'CharacterId': 1,
  'Author': 'Vk, updated by terry kuo, adapted by tenebrous',
  'Width': 122.0,
  'Height': 99.0},
 'Bullseye': {'index': 1,
  'CharacterId': 2,
  'Author': 'Doom & o ilusionista',
  'Width': 86.0,
  'Height': 113.0},
 'Elecman': {'index': 2,
  'CharacterId': 3,
  'Author': 'O ilusionista & akitosama',
  'Width': 94.0,
  'Height': 80.0},
 'Ooze-o': {'index': 3,
  'CharacterId': 4,
  'Author': 'O ilusionista',
  'Width': 93.0,
  'Height': 125.0},
 'Shin kazuma': {'index': 4,
  'CharacterId': 5,
  'Author': 'O ilusionista',
  'Width': 66.0,
  'Height': 94.0},
 'Mexican typhoon': {'index': 5,
  'CharacterId': 6,
  'Author': 'O ilusionista',
  'Width': 124.0,
  'Height': 131.0},
 'Zangief maskered': {'index': 6,
  'CharacterId': 7,
  'Author': 'O ilusionista',
  'Width': 138.0,
  'Height': 115.0},
 'Predalien': {'index': 7,
  'CharacterId': 8,
  'Author': 'Josipknezovicz',
  'Width': 237.0,
  'Height': 130.0},
 'Eva-00': {'index': 8,
  'Charac

In [74]:
complete_match_data = complete_match_data[complete_match_data['WinnerId'] != 'N/A']

In [100]:
win_matrix = np.zeros((len(joined_dataset), len(joined_dataset)))
for match in complete_match_data.iterrows():
    if match[0] % 10000 == 0:
        print(match[0])
    winner_index = natural_indexed_dict[match[1][match[1]['WinnerId']]]['index']
    if match[1]['WinnerId'] == 'Red':
        loser_index = natural_indexed_dict[match[1]['Blue']]['index']
    else:
        loser_index = natural_indexed_dict[match[1]['Red']]['index']
    win_matrix[winner_index, loser_index] = win_matrix[winner_index, loser_index] + 1

0
10000
20000
30000
40000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
220000
230000
240000
250000
260000
270000
280000
290000
300000
320000
330000
340000
350000
360000
380000
390000
400000
410000
420000
430000
440000
450000
460000
470000
490000
500000
510000
520000
530000
540000
550000
560000
570000
580000
590000
600000
610000
620000
630000
640000
650000
660000
670000
680000
690000
710000
720000
740000
750000
760000
770000
780000
790000
800000
810000
820000
840000
850000
860000
870000
880000
890000
910000
920000
930000
940000


In [104]:
np.savetxt("/home/jordan/basic_win_matrix.csv", win_matrix, delimiter=",")

In [105]:
hitbox_dict = joined_dataset.reset_index().to_dict('index')
hitbox_dict

{0: {'CharacterId': 1,
  'Name': 'Mr. bison',
  'Author': 'Vk, updated by terry kuo, adapted by tenebrous',
  'Width': 122.0,
  'Height': 99.0},
 1: {'CharacterId': 2,
  'Name': 'Bullseye',
  'Author': 'Doom & o ilusionista',
  'Width': 86.0,
  'Height': 113.0},
 2: {'CharacterId': 3,
  'Name': 'Elecman',
  'Author': 'O ilusionista & akitosama',
  'Width': 94.0,
  'Height': 80.0},
 3: {'CharacterId': 4,
  'Name': 'Ooze-o',
  'Author': 'O ilusionista',
  'Width': 93.0,
  'Height': 125.0},
 4: {'CharacterId': 5,
  'Name': 'Shin kazuma',
  'Author': 'O ilusionista',
  'Width': 66.0,
  'Height': 94.0},
 5: {'CharacterId': 6,
  'Name': 'Mexican typhoon',
  'Author': 'O ilusionista',
  'Width': 124.0,
  'Height': 131.0},
 6: {'CharacterId': 7,
  'Name': 'Zangief maskered',
  'Author': 'O ilusionista',
  'Width': 138.0,
  'Height': 115.0},
 7: {'CharacterId': 8,
  'Name': 'Predalien',
  'Author': 'Josipknezovicz',
  'Width': 237.0,
  'Height': 130.0},
 8: {'CharacterId': 9,
  'Name': 'Eva-00'

In [108]:
height_advantage_win_matrix = np.zeros((len(joined_dataset), len(joined_dataset)))
height_advantage_loss_matrix = np.zeros((len(joined_dataset), len(joined_dataset)))
for match in complete_match_data.iterrows():
    if match[0] % 10000 == 0:
        print(match[0])
    winner_index = natural_indexed_dict[match[1][match[1]['WinnerId']]]['index']
    if match[1]['WinnerId'] == 'Red':
        loser_index = natural_indexed_dict[match[1]['Blue']]['index']
    else:
        loser_index = natural_indexed_dict[match[1]['Red']]['index']
    if hitbox_dict[winner_index]['Height'] > hitbox_dict[loser_index]['Height']:
        height_advantage_win_matrix[winner_index, loser_index] = height_advantage_win_matrix[winner_index, loser_index] + 1
    else:
        height_advantage_loss_matrix[winner_index, loser_index] = height_advantage_loss_matrix[winner_index, loser_index] + 1
        
np.savetxt("/home/jordan/height_advantage_win_matrix.csv", height_advantage_win_matrix, delimiter=",")
np.savetxt("/home/jordan/height_advantage_loss_matrix.csv", height_advantage_loss_matrix, delimiter=",")

0
10000
20000
30000
40000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
220000
230000
240000
250000
260000
270000
280000
290000
300000
320000
330000
340000
350000
360000
380000
390000
400000
410000
420000
430000
440000
450000
460000
470000
490000
500000
510000
520000
530000
540000
550000
560000
570000
580000
590000
600000
610000
620000
630000
640000
650000
660000
670000
680000
690000
710000
720000
740000
750000
760000
770000
780000
790000
800000
810000
820000
840000
850000
860000
870000
880000
890000
910000
920000
930000
940000


In [107]:
width_advantage_win_matrix = np.zeros((len(joined_dataset), len(joined_dataset)))
width_advantage_loss_matrix = np.zeros((len(joined_dataset), len(joined_dataset)))
for match in complete_match_data.iterrows():
    if match[0] % 10000 == 0:
        print(match[0])
    winner_index = natural_indexed_dict[match[1][match[1]['WinnerId']]]['index']
    if match[1]['WinnerId'] == 'Red':
        loser_index = natural_indexed_dict[match[1]['Blue']]['index']
    else:
        loser_index = natural_indexed_dict[match[1]['Red']]['index']
    if hitbox_dict[winner_index]['Width'] > hitbox_dict[loser_index]['Width']:
        width_advantage_win_matrix[winner_index, loser_index] = width_advantage_win_matrix[winner_index, loser_index] + 1
    else:
        width_advantage_loss_matrix[winner_index, loser_index] = width_advantage_loss_matrix[winner_index, loser_index] + 1
        
np.savetxt("/home/jordan/width_advantage_win_matrix.csv", width_advantage_win_matrix, delimiter=",")
np.savetxt("/home/jordan/width_advantage_loss_matrix.csv", width_advantage_loss_matrix, delimiter=",")

0
10000
20000
30000
40000
60000
70000
80000
90000
100000
110000
120000
130000
140000
150000
160000
170000
180000
190000
200000
220000
230000
240000
250000
260000
270000
280000
290000
300000
320000
330000
340000
350000
360000
380000
390000
400000
410000
420000
430000
440000
450000
460000
470000
490000
500000
510000
520000
530000
540000
550000
560000
570000
580000
590000
600000
610000
620000
630000
640000
650000
660000
670000
680000
690000
710000
720000
740000
750000
760000
770000
780000
790000
800000
810000
820000
840000
850000
860000
870000
880000
890000
910000
920000
930000
940000
