# CPL Data Analysis

### Loading The Data

In [301]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# load each csv file and store them in a dictionary
data_path = '../data/'
data_dict = {}

for i, file in enumerate(os.listdir(data_path)):
    if file.endswith(".csv"):
        data_dict[file.split(".")[0]] = pd.read_csv(os.path.join(data_path, file))
        
data_dict.keys()

dict_keys(['CPL-Player-By-Game-2019', 'CPL-Team-Totals-2019', 'CPL-Team-By-Game-2019', 'CPL-Player-Totals-2019'])

In [302]:
# get player totals and drop players with less than 450 mins game time
player_totals = data_dict['CPL-Player-Totals-2019']
player_totals = player_totals[player_totals.Min > 450].drop("Rank", axis=1)
player_totals.head()

Unnamed: 0,GM,playerId,optaPersonId,Min,Age,Position,firstName,lastName,player,team,...,PsCmpA3,PsCmpD3,PsAttD3,PsAttM3,Pull Back Passes Completed,PsDist,ChlngeLost,Aerial%,Aerials,ExpGAg
0,28,d3uo72swkorck8mo41kv1yp61,423548,2470,23.0,Left Centre Back,Daniel,Krutzen,D. Krutzen,Forge,...,54,459,607,873,0,22.8,16,55.8%,86,0.0
1,29,7lqj7hx4na2kvl5doan797vmx,250776,2425,25.0,Right Centre Back,Dominick,Zator,D. Zator,Cavalry,...,67,523,697,638,0,21.8,11,61.1%,149,0.0
2,25,622w13rktz2os2jlo286col3p,168505,2159,24.0,Central Midfielder,Manuel,Aparicio,M. Aparicio,York9,...,197,123,172,837,0,17.5,49,34.2%,38,0.0
3,28,2zedqkn15thv6wx5xmgv48gd1,140123,2443,29.0,Central Midfielder,Kyle,Bekker,K. Bekker,Forge,...,296,118,162,862,0,19.4,25,40.5%,42,0.0
4,28,6o11en42rzgf741oqqlu9a47p,159509,2520,24.0,Right Centre Back,Luca,Gasparotto,L. Gasparotto,York9,...,13,716,830,535,0,20.6,10,76.5%,162,0.0


In [303]:
# get all central midfielders
cm_positions = {"Central Midfielder", "Defensive Midfielder", "Centre Attacking Midfielder"}
cm_totals = player_totals[player_totals['Position'].apply(lambda a: a in cm_positions)].reset_index(drop=True)
cm_totals.head()

Unnamed: 0,GM,playerId,optaPersonId,Min,Age,Position,firstName,lastName,player,team,...,PsCmpA3,PsCmpD3,PsAttD3,PsAttM3,Pull Back Passes Completed,PsDist,ChlngeLost,Aerial%,Aerials,ExpGAg
0,25,622w13rktz2os2jlo286col3p,168505,2159,24.0,Central Midfielder,Manuel,Aparicio,M. Aparicio,York9,...,197,123,172,837,0,17.5,49,34.2%,38,0.0
1,28,2zedqkn15thv6wx5xmgv48gd1,140123,2443,29.0,Central Midfielder,Kyle,Bekker,K. Bekker,Forge,...,296,118,162,862,0,19.4,25,40.5%,42,0.0
2,25,ar6i26vtb7c9tjfrlul7pp3v9,108975,1768,26.0,Central Midfielder,Julian,Büscher,J. Büscher,Cavalry,...,164,108,147,652,1,20.9,54,30.6%,36,0.0
3,20,2rv2jwanai39vawrmnws6vidx,231697,1746,24.0,Central Midfielder,Alexander,Gonzalez Moreno,A. Gonzalez,Pacific,...,57,346,393,775,0,17.6,24,44.7%,38,0.0
4,25,7t6lyj5pvvgzfscj775acuqc5,173165,2249,23.0,Centre Attacking Midfielder,Marco,Bustos,M. Bustos,Valour,...,305,47,60,476,1,16.3,27,32.4%,34,0.0


In [298]:
# categorise types of features
general_features = [
    "GM", "Min", "Age", "Position", "player",
    "team", "TmTchPct"
]


cm_features = [
    "SOG","SOTInBox","Off","OffTgtOBox","ShotDirFK",
    "GoalDirFK","GoalInBx","GoalOBx","GoalOP","PsOpHfFl",
    "PsOnHfFl","PsAtt","PsOnHfScs","PsOpHfScs","ExpG","ExpA",
    "BgChncCrtd","1v1","1v1InBox","Sc1v1InBox","SucflDuels",
    "Success1v1","ThrghBalls","ThrghBlCmp","Touches","DuelLs",
    "FailTackle","SucflTkls","Int","DefTouch","TcklMade%",
    "Clrnce","HeadClear","Recovery","Turnover",
    "Disposs","TchsA3","TchsD3","TchsM3","TouchOpBox",
    "%PassFwd","Pass%","PsCmpM3","PsCmpA3","PsCmpD3",
    "PsAttD3","PsAttM3","PsDist","ChlngeLost","Aerial%",
    "Aerials","ErrShot", "Goal"
]

cm_totals = cm_totals[cm_features + general_features]

In [300]:
# Creative Metrics
creative_metrics = [
    "PsAtt", "%PassFwd", "ExpA", "BgChncCrtd", "1v1", 
    "Success1v1", "ThrghBalls", "ThrghBlCmp", "DuelLs", "SucflDuels",
    "Turnover", "TchsA3","TchsD3","TchsM3", "TouchOpBox", "Pass%", 
    "PsCmpM3","PsCmpA3","PsCmpD3","PsAttD3","PsAttM3","PsDist"
]

# Offensive Metrics

# Defensive Metrics

cm_creative = cm_totals[creative_metrics + general_features]
cm_creative.sort_values(by="Success1v1")

Unnamed: 0,PsAtt,%PassFwd,ExpA,BgChncCrtd,1v1,Success1v1,ThrghBalls,ThrghBlCmp,DuelLs,SucflDuels,...,PsAttD3,PsAttM3,PsDist,GM,Min,Age,Position,player,team,TmTchPct
34,225,39.1%,1.06,2,6,2,0,0,34,28,...,43,159,19.2,15,517,21.0,Central Midfielder,S. Furlano,York9,0.005457
31,361,36.0%,0.34,1,5,2,0,0,50,43,...,89,247,21.4,10,533,26.0,Central Midfielder,M. Eustaquio,Cavalry,0.004987
30,353,26.3%,2.8,4,8,2,1,1,102,94,...,34,196,14.4,24,1467,27.0,Centre Attacking Midfielder,Oliver Minatel,Cavalry,0.004987
23,511,51.3%,1.56,2,7,3,0,0,79,86,...,109,313,21.0,20,1317,34.0,Central Midfielder,N. Ledgerwood,Cavalry,0.004987
32,292,34.6%,0.15,0,6,3,2,1,44,42,...,77,181,17.7,8,596,21.0,Central Midfielder,D. Norman,Pacific,0.005207
26,494,31.8%,0.46,0,6,4,0,0,71,55,...,97,339,16.9,13,901,33.0,Defensive Midfielder,Galán,Valour,0.004643
21,592,38.7%,0.34,0,6,4,0,0,89,93,...,122,429,17.2,18,1435,32.0,Central Midfielder,E. John,HFX Wanderers,0.004544
5,1081,37.8%,2.86,4,7,4,1,0,66,79,...,236,725,20.3,23,2010,30.0,Central Midfielder,Soria,Edmonton,0.004883
14,907,31.6%,0.89,1,12,7,3,1,88,100,...,237,601,19.1,22,1731,19.0,Central Midfielder,A. Hojabrpour,Pacific,0.005207
15,881,34.8%,1.96,1,22,9,2,2,114,103,...,167,614,19.6,21,1772,24.0,Defensive Midfielder,L. Béland-Goyette,Valour,0.004643
