# 1. Libraries and setup

In [79]:
import pandas as pd
import seaborn as sns
import re

In [80]:
df = pd.read_csv("EUACSets.csv")

In [81]:
df.tail()

Unnamed: 0,Player1,Player2,Winner,Score,Loser,MatchNo,EUAC,Date,P1 Seed,P1 Placement,P2 Seed,P2 Placement
2182,Grimwood96,EnteMCF,Grimwood96,2-1,EnteMCF,5,45,05/04/20,2,1,15,13
2183,Cosalina,SAVE_ARMS_222,Cosalina,2-0,SAVE_ARMS_222,4,45,05/04/20,5,7,12,13
2184,replicant___,Udon_Pasta,replicant___,2-0,Udon_Pasta,3,45,05/04/20,4,2,13,9
2185,Penzo,Raffa_,Raffa_,0-2,Penzo,2,45,05/04/20,8,9,9,5
2186,Ripha,Suricat,Ripha,2-0,Suricat,1,45,05/04/20,1,3,16,13


In [82]:
playerdf = pd.read_csv("PlayerDetails.csv")

In [83]:
playerdf.head()

Unnamed: 0,Start ID,Player,Challonge ID
0,1152276.0,FR | Maxou0708,
1,1118825.0,Rapha_MTH,
2,1133810.0,Sabaca,
3,1152521.0,TCM | Raffa,
4,1152258.0,FrankTank,


In [84]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2187 entries, 0 to 2186
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Player1       2184 non-null   object
 1   Player2       2185 non-null   object
 2   Winner        2184 non-null   object
 3   Score         2184 non-null   object
 4   Loser         2185 non-null   object
 5   MatchNo       2187 non-null   int64 
 6   EUAC          2187 non-null   object
 7   Date          2187 non-null   object
 8   P1 Seed       2187 non-null   int64 
 9   P1 Placement  2187 non-null   int64 
 10  P2 Seed       2187 non-null   int64 
 11  P2 Placement  2187 non-null   int64 
dtypes: int64(5), object(7)
memory usage: 205.2+ KB


In [85]:
df.describe()

Unnamed: 0,MatchNo,P1 Seed,P1 Placement,P2 Seed,P2 Placement
count,2187.0,2187.0,2187.0,2187.0,2187.0
mean,11.574303,4.479195,4.236854,6.924554,5.653406
std,7.840901,3.373773,2.900556,3.992133,3.36532
min,1.0,1.0,1.0,1.0,1.0
25%,5.0,2.0,2.0,4.0,3.0
50%,10.0,4.0,4.0,6.0,5.0
75%,16.0,6.0,5.0,9.0,9.0
max,49.0,25.0,17.0,25.0,25.0


In [86]:
df.columns

Index(['Player1', 'Player2', 'Winner', 'Score', 'Loser', 'MatchNo', 'EUAC',
       'Date', 'P1 Seed', 'P1 Placement', 'P2 Seed', 'P2 Placement'],
      dtype='object')

In [87]:
df.shape

(2187, 12)

# 2. Missing Data

## 2.1 Null data

In [88]:
df.isnull().sum()

Player1         3
Player2         2
Winner          3
Score           3
Loser           2
MatchNo         0
EUAC            0
Date            0
P1 Seed         0
P1 Placement    0
P2 Seed         0
P2 Placement    0
dtype: int64

In [89]:
df[df.isnull().any(axis=1)]

Unnamed: 0,Player1,Player2,Winner,Score,Loser,MatchNo,EUAC,Date,P1 Seed,P1 Placement,P2 Seed,P2 Placement
890,Iceman92,Cosalina,Iceman92,,Cosalina,10,47,03/05/20,4,5,10,9
929,Grimwood96,Cosalina,Grimwood96,,Cosalina,4,49,31/05/20,3,5,6,9
933,Cosalina,Iloria,Iloria,,Cosalina,8,49,31/05/20,6,9,9,5
1686,Mr_Adal,,,1-2,Mr_Adal,2,91,24/04/22,2,5,7,3
1689,,Iceman92,Iceman92,0-2,,5,91,24/04/22,7,3,3,2
1692,,YoshiBowser,,2-1,YoshiBowser,8,91,24/04/22,7,3,5,5
1694,,Giusesbica004,,2-0,Giusesbica004,10,91,24/04/22,7,3,4,4
1695,Iceman92,,Iceman92,3-1,,11,91,24/04/22,3,2,7,3


Checking these instances manually show that I did not take into account cases where a player is DQ'd by an admin, and therefore, no score is reported. These will be replaced with "0--1" to show a DQ. <br>
Our missing player is Gachimummy from EUAC91. They did not have a Challonge account and the admins manually filled their scores for them. <br>
<br>
Unsure if I'll ever use the code from that notebook again but good to know if I ever do

In [90]:
# DQ Scores
df["Score"] = df["Score"].fillna("0--1")

# Only 1 player is missing so fill with their name
df["Player1"] = df["Player1"].fillna("Gachimummy")
df["Player2"] = df["Player2"].fillna("Gachimummy")
df["Winner"] = df["Winner"].fillna("Gachimummy")
df["Loser"] = df["Loser"].fillna("Gachimummy")

In [91]:
df.isnull().sum()

Player1         0
Player2         0
Winner          0
Score           0
Loser           0
MatchNo         0
EUAC            0
Date            0
P1 Seed         0
P1 Placement    0
P2 Seed         0
P2 Placement    0
dtype: int64

All missing data has been filled in

## 2.2 Incorrect Scores

In [92]:
df["Score"].unique()

array(['2-0', '0--1', '2-1', '3-1', '3-0', '0-2', '86-101', '1-2', '2-3',
       '1-3', '3-2', '0-3', '0-2,2-0,1-2', '1-0,1-0', '2-1,2-0',
       '2-1,1-2,1-2', '2-0,2-0', '0-2,2-0,2-1', '0-1,0-1', '1-0,1-0,0-0',
       '2-0,1-2,2-0', '1-0,0-1,1-0', '0-1,1-0,1-0', '0-1,0-1,0-1',
       '0-2,2-1,2-0', '0-1,0-1,0-0', '1-0,0-1,0-1', '0-1,0-1,1-0,1-0,0-1',
       '1-0,1-0,1-0', '-1-0', '0-0', '2--10', '1-0', '-5-2', '12-0',
       '10-0', '96-0', '42-0', '1-1', '420--69', '0-92', '69-67',
       '1337-1335'], dtype=object)

As mentioned in the previous notebook, users can input scores themselves. And sometimes they like to joke around. <br>
Another thing that was overlooked was that the Challonge API returns all scores for a match. Example: An admin changing the score due to the match being misreported.

In [93]:
df["Score"].value_counts()

2-0                    871
2-1                    307
0-2                    300
1-2                    197
3-1                     93
3-0                     80
3-2                     63
2-3                     59
0-3                     55
1-3                     51
0--1                    24
-1-0                    13
0-0                     12
1-0,1-0                 10
2-0,2-0                  8
1-0,1-0,0-0              5
1-0                      5
0-1,1-0,1-0              3
1-0,0-1,1-0              3
0-1,0-1                  3
2-1,2-0                  2
2-0,1-2,2-0              2
86-101                   1
69-67                    1
0-92                     1
420--69                  1
1-1                      1
42-0                     1
96-0                     1
10-0                     1
12-0                     1
-5-2                     1
2--10                    1
0-2,2-0,1-2              1
1-0,1-0,1-0              1
0-1,0-1,1-0,1-0,0-1      1
1-0,0-1,0-1              1
0

In [94]:
# Split scores based on ",". Use the most recent entry as the true score
scores = []
for score in df["Score"]:
    split = score.split(",")
    result = split[-1]
    scores.append(result)
df["Score"] = scores

In [95]:
df["Score"].unique()

array(['2-0', '0--1', '2-1', '3-1', '3-0', '0-2', '86-101', '1-2', '2-3',
       '1-3', '3-2', '0-3', '1-0', '0-1', '0-0', '-1-0', '2--10', '-5-2',
       '12-0', '10-0', '96-0', '42-0', '1-1', '420--69', '0-92', '69-67',
       '1337-1335'], dtype=object)

In [96]:
# Write scores from winner's perspective. Example: "0-2" becomes "2-0"
df["Score"] = df["Score"].replace("0-2","2-0")
df["Score"] = df["Score"].replace("1-2","2-1")
df["Score"] = df["Score"].replace("2-3","3-2")
df["Score"] = df["Score"].replace("1-3","3-1")
df["Score"] = df["Score"].replace("0-3","3-0")

# DQ's
df["Score"] = df["Score"].replace("0-0","0--1")
df["Score"] = df["Score"].replace("-1-0","0--1")
df["Score"] = df["Score"].replace("1-0","0--1")
df["Score"] = df["Score"].replace("0-1","0--1")
df["Score"] = df["Score"].replace("1-1","2-1")
df["Score"] = df["Score"].replace("2--10","2-0")
df["Score"] = df["Score"].replace("-5-2","2-0")

In [97]:
df["Score"].unique()

array(['2-0', '0--1', '2-1', '3-1', '3-0', '86-101', '3-2', '12-0',
       '10-0', '96-0', '42-0', '420--69', '0-92', '69-67', '1337-1335'],
      dtype=object)

In [98]:
# Replace scores with a 2-0 if misreported with a joke response. It's the most common score
score = []
for scores in df["Score"]:
    if scores.count("-") == 1:
        split = scores.split("-")
        if int(split[0]) > 3 or int(split[1]) > 3:
            replace = "2-0"
            score.append(replace)
        else:
            score.append(scores)
    else:
        score.append(scores)
df["Test"] = score

In [99]:
# Check how close Test is to Score. 
(df["Score"] == df["Test"]).mean()

0.9963420210333791

Slightly off...

In [100]:
(df["Score"] == df["Test"]).value_counts()

True     2179
False       8
dtype: int64

In [101]:
df["Score"] = df["Test"]
df = df.drop(columns=["Test"])

In [102]:
df.head()

Unnamed: 0,Player1,Player2,Winner,Score,Loser,MatchNo,EUAC,Date,P1 Seed,P1 Placement,P2 Seed,P2 Placement
0,Alumento,Owdy,Alumento,2-0,Owdy,1,1,21/10/17,11,7,22,17
1,BambooBoss,FrankTank,FrankTank,2-0,BambooBoss,2,1,21/10/17,14,13,19,5
2,Kotorious BRD,Altair,Kotorious BRD,0--1,Altair,3,1,21/10/17,15,13,18,17
3,RD | | Dushni,TCM | Raffa,TCM | Raffa,2-0,RD | | Dushni,4,1,21/10/17,12,17,21,4
4,FR|TCM | InkAlyut,FR | Maxou0708,FR|TCM | InkAlyut,2-1,FR | Maxou0708,5,1,21/10/17,13,9,20,1


In [103]:
df["Score"].unique()

array(['2-0', '0--1', '2-1', '3-1', '3-0', '3-2', '420--69'], dtype=object)

In [104]:
df["Score"] = df["Score"].replace("420--69", "2-0")

Programmatically, it could have been done better but it gets the job done

# 3. Duplicate Data

This section deals with players that multiple accounts. Players that have a start account have a seperate account for Challonge. Unless they happen to use the same username for both

## 3.1 Start and Challonge Players

Players that have both a challonge and start account

### 3.1.1 Fuzzy searching

In [105]:
import difflib

In [106]:
difflib.get_close_matches("Yam", ["Yamber", "Grimwood"])

['Yamber']

In [107]:
array = []
for i in playerdf["Player"]:
    array = []
    for x in playerdf["Player"]:
        if i != x:
            array.append(x)
    if len(array) > 0:
        matches = difflib.get_close_matches(i, array, cutoff=0.7)
        if len(matches) > 0:
            print(f"Match(es) found for {i}: ")
            print(matches)

Match(es) found for ocrim: 
['ocrim_ger']
Match(es) found for Altair: 
['Alistair__', 'Alistair13']
Match(es) found for ocrim_ger: 
['ocrim']
Match(es) found for Marloni: 
['Warlon']
Match(es) found for Warlon: 
['Marloni']
Match(es) found for Alistair13: 
['Alistair__', 'Altair']
Match(es) found for Kritzz: 
['DrKritz']
Match(es) found for DrKritz: 
['Kritzz']
Match(es) found for Alistair__: 
['Alistair13', 'Altair']


Fuzzing matching is unreliable. Variations of the cut-off were tested but left out for the notebook

### 3.1.2 Manually merge accounts

Using a map to merge accounts and then "flatten" the dataframe. Information for accounts that were duplicates was provided by a player of the ARMS community

In [108]:
playerdf.head(20)

Unnamed: 0,Start ID,Player,Challonge ID
0,1152276.0,FR | Maxou0708,
1,1118825.0,Rapha_MTH,
2,1133810.0,Sabaca,
3,1152521.0,TCM | Raffa,
4,1152258.0,FrankTank,
5,1114568.0,ocrim,
6,1139232.0,Alumento,
7,1133727.0,SC☆Momso,
8,1114889.0,VilleViljar,
9,1152242.0,TCM | BD82,


In [109]:
# Start Name -> Challonge Name
# Information provided by a player in the community
merged = ({
    "TCM | Raffa": "Raffa_",
    "FR|TCM | InkAlyut": "InkA_",
    "ocrim": "ocrim_ger",
    "FR | Maxou0708": "D2S_Maxou",
    "Rapha_MTH": "Ripha"
})

In [110]:
df = df.replace(merged)

In [111]:
df.head()

Unnamed: 0,Player1,Player2,Winner,Score,Loser,MatchNo,EUAC,Date,P1 Seed,P1 Placement,P2 Seed,P2 Placement
0,Alumento,Owdy,Alumento,2-0,Owdy,1,1,21/10/17,11,7,22,17
1,BambooBoss,FrankTank,FrankTank,2-0,BambooBoss,2,1,21/10/17,14,13,19,5
2,Kotorious BRD,Altair,Kotorious BRD,0--1,Altair,3,1,21/10/17,15,13,18,17
3,RD | | Dushni,Raffa_,Raffa_,2-0,RD | | Dushni,4,1,21/10/17,12,17,21,4
4,InkA_,D2S_Maxou,InkA_,2-1,D2S_Maxou,5,1,21/10/17,13,9,20,1


In [112]:
playerdf["Player_Map"] = playerdf["Player"].replace(merged)

In [113]:
playerdf.head(10)

Unnamed: 0,Start ID,Player,Challonge ID,Player_Map
0,1152276.0,FR | Maxou0708,,D2S_Maxou
1,1118825.0,Rapha_MTH,,Ripha
2,1133810.0,Sabaca,,Sabaca
3,1152521.0,TCM | Raffa,,Raffa_
4,1152258.0,FrankTank,,FrankTank
5,1114568.0,ocrim,,ocrim_ger
6,1139232.0,Alumento,,Alumento
7,1133727.0,SC☆Momso,,SC☆Momso
8,1114889.0,VilleViljar,,VilleViljar
9,1152242.0,TCM | BD82,,TCM | BD82


In [114]:
playerdf

Unnamed: 0,Start ID,Player,Challonge ID,Player_Map
0,1152276.0,FR | Maxou0708,,D2S_Maxou
1,1118825.0,Rapha_MTH,,Ripha
2,1133810.0,Sabaca,,Sabaca
3,1152521.0,TCM | Raffa,,Raffa_
4,1152258.0,FrankTank,,FrankTank
...,...,...,...,...
135,0.0,DoJoSeph,6901460.0,DoJoSeph
136,0.0,ProfPie,6467815.0,ProfPie
137,0.0,Udon_Pasta,3673406.0,Udon_Pasta
138,0.0,EnteMCF,3626355.0,EnteMCF


In [115]:
# Group by 'Player_Map' and aggregate Start ID and Challonge ID 
merged_df = playerdf.groupby("Player_Map", as_index=False).agg({
    "Start ID": "first",       
    "Challonge ID": "first",
})

In [116]:
merged_df.head(10)

Unnamed: 0,Player_Map,Start ID,Challonge ID
0,2ednar,0.0,164493.0
1,AU_Leenykins,0.0,2992428.0
2,Alfon42,0.0,2942327.0
3,Alistair13,0.0,4324849.0
4,Alistair__,0.0,5390120.0
5,Altair,1152245.0,
6,Alumento,1139232.0,
7,Anasuis,0.0,6258388.0
8,AngelMac,0.0,2694368.0
9,Anomika,0.0,2944161.0


In [117]:
merged_df = merged_df[["Start ID", "Challonge ID", "Player_Map"]].copy()

In [118]:
# Fill in NaN's with 0's
merged_df["Start ID"] = merged_df["Start ID"].fillna(0)
merged_df["Challonge ID"] = merged_df["Challonge ID"].fillna(0)

In [119]:
# Convert type
merged_df["Start ID"] = merged_df["Start ID"].astype(int)
merged_df["Challonge ID"] = merged_df["Challonge ID"].astype(int)

In [120]:
merged_df.head()

Unnamed: 0,Start ID,Challonge ID,Player_Map
0,0,164493,2ednar
1,0,2992428,AU_Leenykins
2,0,2942327,Alfon42
3,0,4324849,Alistair13
4,0,5390120,Alistair__


## 3.2 Multiple accounts/Deleted accounts

Some players have had multiple accounts throughout the years. We will use a map to track who is who and place this in the playerdf table. Using the player's most recent account as the id

In [121]:
mergeMap = {
    # multiple -> most recent
    "deleted4353211" : "Alistair__",
    "Alistair13" : "Alistair__",
    "deleted3629844" : "BensonUii",
    "Kritzz" : "DoJoSeph",
    "DrKritz" : "DoJoSeph",
    "Anasuis" : "DoJoSeph",
    "PawlSLax" : "SAVE_ARMS_222"
}

In [122]:
merged_df["Player"] = merged_df["Player_Map"]

In [123]:
merged_df["Player_Map"] = merged_df["Player_Map"].replace(mergeMap)

In [124]:
merged_df.sort_values(by="Player_Map").head(40)

Unnamed: 0,Start ID,Challonge ID,Player_Map,Player
0,0,164493,2ednar,2ednar
1,0,2992428,AU_Leenykins,AU_Leenykins
2,0,2942327,Alfon42,Alfon42
3,0,4324849,Alistair__,Alistair13
4,0,5390120,Alistair__,Alistair__
121,0,4353211,Alistair__,deleted4353211
5,1152245,0,Altair,Altair
6,1139232,0,Alumento,Alumento
8,0,2694368,AngelMac,AngelMac
9,0,2944161,Anomika,Anomika


In [125]:
df.head()

Unnamed: 0,Player1,Player2,Winner,Score,Loser,MatchNo,EUAC,Date,P1 Seed,P1 Placement,P2 Seed,P2 Placement
0,Alumento,Owdy,Alumento,2-0,Owdy,1,1,21/10/17,11,7,22,17
1,BambooBoss,FrankTank,FrankTank,2-0,BambooBoss,2,1,21/10/17,14,13,19,5
2,Kotorious BRD,Altair,Kotorious BRD,0--1,Altair,3,1,21/10/17,15,13,18,17
3,RD | | Dushni,Raffa_,Raffa_,2-0,RD | | Dushni,4,1,21/10/17,12,17,21,4
4,InkA_,D2S_Maxou,InkA_,2-1,D2S_Maxou,5,1,21/10/17,13,9,20,1


## 4. Tournament No

The EUAC column would be a great tournament identifier but it has one tournament that uses "PVW" instead of a number. So we'll introduce a new column that will number these tournaments. Great for sorting and keeps the tournament codes

In [126]:
df["EUAC"].unique()

array(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
       '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23',
       '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34',
       '39', '43', '46', '47', '48', '49', '50', '51', '52', '53', '54',
       '55', '56', '57', '58', '59', '60', '61', '62', '63', '64', '65',
       '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76',
       '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87',
       '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98',
       '99', '100', '101', '102', '103', '104', '105', '106', '107',
       'PVW', '108', '109', '110', '35', '36', '37', '38', '40', '41',
       '42', '44', '45'], dtype=object)

In [127]:
df["Tournament No"] = df["EUAC"]

In [128]:
array = []
no = 0
for i in df["Tournament No"]:
    if i == "PVW":
        array.append(108)
    else:
        if int(i) < 108:
            array.append(int(i))
        elif int(i) > 107:
            no = int(i) + 1
            array.append(no)
df["Tournament No"] = array

In [129]:
df.tail(200)

Unnamed: 0,Player1,Player2,Winner,Score,Loser,MatchNo,EUAC,Date,P1 Seed,P1 Placement,P2 Seed,P2 Placement,Tournament No
1987,Iceman92,Yamber,Iceman92,2-1,Yamber,13,108,16/04/23,4,4,6,5,109
1988,Mortal_Instrument,Giusesbica004,Mortal_Instrument,2-0,Giusesbica004,14,108,16/04/23,3,3,5,5,109
1989,Iceman92,Mortal_Instrument,Mortal_Instrument,2-1,Iceman92,15,108,16/04/23,4,4,3,3,109
1990,Ripha,Mortal_Instrument,Ripha,3-0,Mortal_Instrument,17,108,16/04/23,1,1,3,3,109
1991,Alistair__,Ripha,Ripha,3-1,Alistair__,18,108,16/04/23,2,2,1,1,109
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2182,Grimwood96,EnteMCF,Grimwood96,2-1,EnteMCF,5,45,05/04/20,2,1,15,13,45
2183,Cosalina,SAVE_ARMS_222,Cosalina,2-0,SAVE_ARMS_222,4,45,05/04/20,5,7,12,13,45
2184,replicant___,Udon_Pasta,replicant___,2-0,Udon_Pasta,3,45,05/04/20,4,2,13,9,45
2185,Penzo,Raffa_,Raffa_,2-0,Penzo,2,45,05/04/20,8,9,9,5,45


In [130]:
df["Tournament No"].unique()

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  39,  43,  46,  47,  48,
        49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,
        62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
        75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,
        88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100,
       101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,  35,  36,
        37,  38,  40,  41,  42,  44,  45], dtype=int64)

In [131]:
df = df.sort_values(by=["Tournament No", "MatchNo"])

In [132]:
df.head()

Unnamed: 0,Player1,Player2,Winner,Score,Loser,MatchNo,EUAC,Date,P1 Seed,P1 Placement,P2 Seed,P2 Placement,Tournament No
0,Alumento,Owdy,Alumento,2-0,Owdy,1,1,21/10/17,11,7,22,17,1
1,BambooBoss,FrankTank,FrankTank,2-0,BambooBoss,2,1,21/10/17,14,13,19,5,1
2,Kotorious BRD,Altair,Kotorious BRD,0--1,Altair,3,1,21/10/17,15,13,18,17,1
3,RD | | Dushni,Raffa_,Raffa_,2-0,RD | | Dushni,4,1,21/10/17,12,17,21,4,1
4,InkA_,D2S_Maxou,InkA_,2-1,D2S_Maxou,5,1,21/10/17,13,9,20,1,1


# 5. Exporting

In [133]:
# Convert types
df["Player1"] = df["Player1"].astype(str)
df["Player2"] = df["Player2"].astype(str)
df["Winner"] = df["Winner"].astype(str)
df["Score"] = df["Score"].astype(str)
df["Loser"] = df["Loser"].astype(str)
df["MatchNo"] = df["MatchNo"].astype(int)
df["EUAC"] = df["EUAC"].astype(str) # "PVW" exists in there. Otherwise, would be a number
df["Date"] = pd.to_datetime(df["Date"])
df["P1 Seed"] = df["P1 Seed"].astype(int)
df["P2 Seed"] = df["P2 Seed"].astype(int)
df["P1 Placement"] = df["P1 Placement"].astype(int)
df["P2 Placement"] = df["P2 Placement"].astype(int)
df["Tournament No"] = df["Tournament No"].astype(int)

In [134]:
df.head()

Unnamed: 0,Player1,Player2,Winner,Score,Loser,MatchNo,EUAC,Date,P1 Seed,P1 Placement,P2 Seed,P2 Placement,Tournament No
0,Alumento,Owdy,Alumento,2-0,Owdy,1,1,2017-10-21,11,7,22,17,1
1,BambooBoss,FrankTank,FrankTank,2-0,BambooBoss,2,1,2017-10-21,14,13,19,5,1
2,Kotorious BRD,Altair,Kotorious BRD,0--1,Altair,3,1,2017-10-21,15,13,18,17,1
3,RD | | Dushni,Raffa_,Raffa_,2-0,RD | | Dushni,4,1,2017-10-21,12,17,21,4,1
4,InkA_,D2S_Maxou,InkA_,2-1,D2S_Maxou,5,1,2017-10-21,13,9,20,1,1


In [135]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2187 entries, 0 to 2020
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Player1        2187 non-null   object        
 1   Player2        2187 non-null   object        
 2   Winner         2187 non-null   object        
 3   Score          2187 non-null   object        
 4   Loser          2187 non-null   object        
 5   MatchNo        2187 non-null   int32         
 6   EUAC           2187 non-null   object        
 7   Date           2187 non-null   datetime64[ns]
 8   P1 Seed        2187 non-null   int32         
 9   P1 Placement   2187 non-null   int32         
 10  P2 Seed        2187 non-null   int32         
 11  P2 Placement   2187 non-null   int32         
 12  Tournament No  2187 non-null   int32         
dtypes: datetime64[ns](1), int32(6), object(6)
memory usage: 187.9+ KB


In [136]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 135 entries, 0 to 134
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Start ID      135 non-null    int32 
 1   Challonge ID  135 non-null    int32 
 2   Player_Map    135 non-null    object
 3   Player        135 non-null    object
dtypes: int32(2), object(2)
memory usage: 3.3+ KB


In [137]:
df.to_csv("CleanedSets.csv", index=False)
merged_df.to_csv("CleanedDetails.csv", index=False)