In [58]:
import pandas as pd 
import numpy as np
import os 


In [59]:
pd.set_option("display.max_columns",85)
# for displaying purposes 

## Reading the Data
<hr>

In [60]:
df1= pd.read_csv("season2017-2018.csv")# first resource
df2= pd.read_csv("season2017-2018(2).csv") # second resource

# the data is scraped from different resourses
# so the first part of this notebook will be for cleaing the data and 
# unify the attributes and so on

In [61]:
df1.drop(["Unnamed: 0"],axis='columns',inplace=True)
# droping the indexing column in the data 

In [62]:
# convert the date column from string to a date object 
# date column will be a very important column in the preprocssesing process 
# it will be the index that the matches will be jonined at 
# the date should be the same for the same match in different resources 
# it means that the data will be sorted on the date then on other attributes discussed later 
df1["date"]=pd.to_datetime(df1["date"].str.replace("-",""),format="%Y%m%d")

In [63]:
df2["Date"]=pd.to_datetime(df2["Date"].str.replace("/",""),format="%d%m%Y")

In [64]:
# dropping unwanted columns 

In [65]:
df2.drop(["Div","FTHG","FTAG","HTHG","HTAG","HTR","Referee"],axis='columns',inplace=True)
# drop the dive , FTHG(final time home goal), FTAG(Final time  away Goals)
# HTGH(half time goals home ), " ... " "to referee"

In [66]:
index = df2.columns.get_loc("B365H")
df2.drop(df2.iloc[:,index:],axis=1,inplace=True)
# dropping the betting data 

In [67]:
df2.sort_values(by=["Date","HomeTeam","AwayTeam"],inplace=True)
# sorting the data on Date , Home , then Away 
# the data relevant to each other 
# it means that the 380 row in the first resources will be the same 
# matches for the second resource which will make the joining process correct 

In [68]:
df1.sort_values(by=["date","home","away"],inplace=True)

In [69]:
# TCP -> Team Complete Pass
# TTP -> Team Total Pass

df1["TCP"]=df1["passeshome"].str.split("—").apply(lambda x : x[0].split("of")[0])
df1["TTP"]=df1["passeshome"].str.split("—").apply(lambda x : x[0].split("of")[1])
df1.drop(["passeshome"],axis=1,inplace=True)


# the date was in string format like that "200 of 400-- 50%"
# so I want to get the team complete pass 200 , team total pass 400 


In [70]:
#OCP ->Opponent Complete Pass
#OTP -> Opponent Total Pass
df1["OCP"]=df1["passesaway"].str.split("—").apply(lambda x : x[1].split("of")[0])
df1["OTP"]=df1["passesaway"].str.split("—").apply(lambda x : x[1].split("of")[1])
df1.drop(["passesaway"],axis=1,inplace=True)

# the same thing in opponent 

In [71]:
df1.drop(["saveshome","savesaway"],axis=1,inplace=True)

# you have extracted the data so , drop the columns 

In [72]:
df1.rename(columns={"possessionhome":"POT","possessionaway":"POO","crosseshome":"CRT",
                   "crossesaway":"CRO","toucheshome":"TCT","touchesaway":"TCO",
                   "tackleshome":"TTT","tacklesaway":"TTO","interceptionhome":"IPT","interceptionaway":"IPO",
                   "aerialshome":"ART","aerialsaway":"ARO","clearnacehome":"CLT","clearnaceaway":"CLO"},inplace=True)


# renaming the date 

# POT-> PossesionTeam
# POO=> PossesionOpponent
# CRT => crossesTeam
# CRO => crossesOpponent
# TCT => touchesTeam
# TCO => touchesOpponent
# TTT => tacklesTeam
# TTO => tacklesOpponent
# IPT => interceptionTeam
# IPO => interceptionOpponent
# ART => aerialsTeam
# ARO => aerialsOpponent
# CLT => clearnaceTeam
# CLO => clearnaceOpponent


# the team data means the home team , opponent will be the away team 
# I will explain in details what will I do to make use of the Home attribute 
# in the model 

In [73]:
df1.set_index("date",inplace=True)
df2.set_index("Date",inplace=True)

In [74]:
total_data=pd.concat([df1,df2],axis=1).drop(["HomeTeam","AwayTeam"],axis=1)

# merging the data 

In [75]:
# TS=> Team shot 
# OS=> Oppoent shot 
# TST=>Team shot taret
# OST=>Opponent shot target

# and the rest just change the home t- team , and away to opponent 

total_data.rename({"HS":"TS","AS":"OS","HST":"TST","AST":"OST","HF":"TF","AF":"OF","HC":"TC","AC":"OC","HY":"TY"
                   ,"AY":"OY","HR":"TR","AR":"OR"},axis=1,inplace=True)

In [76]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import copy 
# this process should be done after concat all the data exampels 
# because as you know there are 5 teams that will be present from the 
# Div1 and five will go from Div0 to Div1 each season 
# so to get all the name of the teams that 
# This is not always ideal for ML as the integers have different numerical values, 
#suggesting that one is bigger than the other, with, for example Pear > Apple, 
#which is not at all the case. To not introduce this kind of problem you'd want to use OneHotEncoder.


In [77]:
total_data.rename({"home":"team","away":"opponent"},axis=1,inplace=True)
# renameing the home to team and the away to opponent 

In [78]:
total_data["Home"]=1



## Important Part 
<hr>
**here I will  give the model the name of the team then the opponent name and the most effective attribute is 'are you palying on your home or not' so I should duplicate the data and reverse the order**
example 


- if the home team is arsenal and the away is Liverpool
- the first example should be like that {"team":"Arsenal","opponent":"Liverpool",...."Home":"1"}
-  the last input was taking in Arsenal perspective so what about liverpool
-  we should add another example in the data which will talk about Liverpool perspective 
-  {"team":"Liverpool", "opponent":"Aresnal",.... "Home":"0"}

In [79]:
temp=copy.deepcopy(total_data)

In [80]:
total_data.columns

Index(['team', 'opponent', 'POT', 'POO', 'CRT', 'CRO', 'TCT', 'TCO', 'TTT',
       'TTO', 'IPT', 'IPO', 'ART', 'ARO', 'CLT', 'CLO', 'TCP', 'TTP', 'OCP',
       'OTP', 'FTR', 'TS', 'OS', 'TST', 'OST', 'TF', 'OF', 'TC', 'OC', 'TY',
       'OY', 'TR', 'OR', 'Home'],
      dtype='object')

In [81]:
temp.rename({ "team":"opponent","opponent":"team" , "POT":"POO","POO":"POT" ,"CRT":"CRO","CRO":"CRT",
              "TCT":"TCO","TCO":"TCT" , "TTT":"TTO","TTO":"TTT" , "IPT":"IPO","IPO":"IPT" 
             ,"ART":"ARO","ARO":"ART", "CLT":"CLO","CLO":"CLT", "TCP":"OCP","OCP":"TCP" 
             ,"TTP":"OTP","OTP":"TTP",  "TS":"OS","OS":"TS", "TST":"OST","OST":"TST" ,"TF":"OF","OF":"TF",
            "TC":"OC","OC":"TC", "TY":"OY","OY":"TY", "TR":"OR","OR":"TR" }
            ,axis=1,inplace=True)



# just reversing the order of all the attributes the team will be the 
# opponent and the opponent will be the team to see the prespective 
# of the two clubs not only one 

In [82]:
temp=temp[["team","opponent","POT","POO","CRT","CRO","TCT","TCO","TTT","TTO","IPT","IPO","ART","ARO","CLT","CLO","TCP","TTP","OCP","OTP","FTR","TS","OS","TST","OST","TF","OF","TC","OC","TY","OY","TR","OR","Home"]]

# reording the columns to be in the  same order as the total data  

In [83]:
total_data.columns==temp.columns

filt_win= (total_data["FTR"]=='H')
filt_draw=(total_data["FTR"]=='D')
filt_loss=(total_data["FTR"]=='A')
print(total_data.loc[filt_draw].shape[0])
print(total_data.loc[filt_win].shape[0])
print(total_data.loc[filt_loss].shape[0])
total_data.loc[filt_win,"FTR"]=2
total_data.loc[filt_draw,"FTR"]=1
total_data.loc[filt_loss,"FTR"]=0
total_data["Home"]=1
total_data[["team","opponent","FTR","Home"]]

# just encode the H-> my perspective team win to 2 
#                 D-> Drawing 
#                 L-> loss 
# and see some data about the effect of home in making the team 
# make a positive result 

99
173
108


Unnamed: 0,team,opponent,FTR,Home
2017-08-11,Arsenal,Leicester City,2,1
2017-08-12,Brighton & Hove Albion,Manchester City,0,1
2017-08-12,Chelsea,Burnley,0,1
2017-08-12,Crystal Palace,Huddersfield Town,0,1
2017-08-12,Everton,Stoke City,2,1
...,...,...,...,...
2018-05-13,Newcastle United,Chelsea,2,1
2018-05-13,Southampton,Manchester City,0,1
2018-05-13,Swansea City,Stoke City,0,1
2018-05-13,Tottenham Hotspur,Leicester City,2,1


In [84]:
filt_win= (temp["FTR"]=='H')
filt_draw=(temp["FTR"]=='D')
filt_loss=(temp["FTR"]=='A')

temp.loc[filt_win,"FTR"]=0
temp.loc[filt_loss,"FTR"]=2
temp.loc[filt_draw,"FTR"]=1
temp["Home"]=0
temp[["team","opponent","FTR","Home"]]

# the second team prespective is the converse of the first one 

Unnamed: 0,team,opponent,FTR,Home
2017-08-11,Leicester City,Arsenal,0,0
2017-08-12,Manchester City,Brighton & Hove Albion,2,0
2017-08-12,Burnley,Chelsea,2,0
2017-08-12,Huddersfield Town,Crystal Palace,2,0
2017-08-12,Stoke City,Everton,0,0
...,...,...,...,...
2018-05-13,Chelsea,Newcastle United,0,0
2018-05-13,Manchester City,Southampton,2,0
2018-05-13,Stoke City,Swansea City,2,0
2018-05-13,Leicester City,Tottenham Hotspur,0,0


In [85]:
season_data_complete=pd.concat([total_data,temp],axis=0) # concat the data 

In [86]:
season_data_complete

Unnamed: 0,team,opponent,POT,POO,CRT,CRO,TCT,TCO,TTT,TTO,IPT,IPO,ART,ARO,CLT,CLO,TCP,TTP,OCP,OTP,FTR,TS,OS,TST,OST,TF,OF,TC,OC,TY,OY,TR,OR,Home
2017-08-11,Arsenal,Leicester City,68%,32%,20,18,801,427,23,17,13,11,18,17,33,29,565,681,192,315,2,27,6,10,3,9,12,9,4,0,1,0,0,1
2017-08-12,Brighton & Hove Albion,Manchester City,23%,77%,8,27,343,897,10,10,12,9,13,13,43,8,146,248,718,820,0,6,14,2,4,6,9,3,10,0,2,0,0,1
2017-08-12,Chelsea,Burnley,62%,38%,26,16,667,453,10,8,9,13,15,21,23,44,468,568,257,348,0,19,10,6,5,16,11,8,5,3,3,2,0,1
2017-08-12,Crystal Palace,Huddersfield Town,56%,45%,27,14,561,460,24,28,19,9,18,16,20,32,333,444,230,356,0,14,8,4,6,7,19,12,9,1,3,0,0,1
2017-08-12,Everton,Stoke City,60%,40%,18,27,670,473,17,18,19,24,19,23,43,26,425,545,233,356,2,9,9,4,1,13,10,6,7,1,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-05-13,Chelsea,Newcastle United,58%,42%,14,15,719,550,18,20,14,18,26,11,39,12,512,606,335,443,0,6,16,2,6,10,11,2,4,1,0,0,0,0
2018-05-13,Manchester City,Southampton,69%,31%,27,11,745,407,13,24,4,11,18,16,21,31,534,642,191,295,2,13,8,2,3,10,8,12,1,1,3,0,0,0
2018-05-13,Stoke City,Swansea City,43%,58%,5,31,569,697,21,16,18,8,22,11,46,4,354,442,498,598,2,8,26,5,11,9,12,0,6,2,1,0,0,0
2018-05-13,Leicester City,Tottenham Hotspur,36%,64%,9,13,414,617,23,20,16,9,15,10,19,19,210,299,414,523,0,16,14,9,6,13,9,4,4,2,1,0,0,0


In [87]:
temp_home=pd.get_dummies(season_data_complete["team"],prefix="team")
temp_home

# converting the team name into hot vector 
# team name will be a crucial part , because if for example Liverpool 
# is playing with brighton "the winning chances for liverpool " will 
# be higher 

Unnamed: 0,team_Arsenal,team_Bournemouth,team_Brighton & Hove Albion,team_Burnley,team_Chelsea,team_Crystal Palace,team_Everton,team_Huddersfield Town,team_Leicester City,team_Liverpool,team_Manchester City,team_Manchester United,team_Newcastle United,team_Southampton,team_Stoke City,team_Swansea City,team_Tottenham Hotspur,team_Watford,team_West Bromwich Albion,team_West Ham United
2017-08-11,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2017-08-12,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2017-08-12,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2017-08-12,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2017-08-12,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-05-13,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [88]:
temp_opponent=pd.get_dummies(season_data_complete["opponent"],prefix="opponent")
temp_opponent

Unnamed: 0,opponent_Arsenal,opponent_Bournemouth,opponent_Brighton & Hove Albion,opponent_Burnley,opponent_Chelsea,opponent_Crystal Palace,opponent_Everton,opponent_Huddersfield Town,opponent_Leicester City,opponent_Liverpool,opponent_Manchester City,opponent_Manchester United,opponent_Newcastle United,opponent_Southampton,opponent_Stoke City,opponent_Swansea City,opponent_Tottenham Hotspur,opponent_Watford,opponent_West Bromwich Albion,opponent_West Ham United
2017-08-11,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
2017-08-12,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
2017-08-12,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2017-08-12,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
2017-08-12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-05-13,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0


In [89]:
hot_vectors=pd.concat([temp_home,temp_opponent],axis=1)
hot_vectors

Unnamed: 0,team_Arsenal,team_Bournemouth,team_Brighton & Hove Albion,team_Burnley,team_Chelsea,team_Crystal Palace,team_Everton,team_Huddersfield Town,team_Leicester City,team_Liverpool,team_Manchester City,team_Manchester United,team_Newcastle United,team_Southampton,team_Stoke City,team_Swansea City,team_Tottenham Hotspur,team_Watford,team_West Bromwich Albion,team_West Ham United,opponent_Arsenal,opponent_Bournemouth,opponent_Brighton & Hove Albion,opponent_Burnley,opponent_Chelsea,opponent_Crystal Palace,opponent_Everton,opponent_Huddersfield Town,opponent_Leicester City,opponent_Liverpool,opponent_Manchester City,opponent_Manchester United,opponent_Newcastle United,opponent_Southampton,opponent_Stoke City,opponent_Swansea City,opponent_Tottenham Hotspur,opponent_Watford,opponent_West Bromwich Albion,opponent_West Ham United
2017-08-11,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
2017-08-12,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
2017-08-12,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2017-08-12,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
2017-08-12,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-05-13,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0


In [90]:
season_data_complete=pd.concat([hot_vectors,season_data_complete],axis=1)
season_data_complete

Unnamed: 0,team_Arsenal,team_Bournemouth,team_Brighton & Hove Albion,team_Burnley,team_Chelsea,team_Crystal Palace,team_Everton,team_Huddersfield Town,team_Leicester City,team_Liverpool,team_Manchester City,team_Manchester United,team_Newcastle United,team_Southampton,team_Stoke City,team_Swansea City,team_Tottenham Hotspur,team_Watford,team_West Bromwich Albion,team_West Ham United,opponent_Arsenal,opponent_Bournemouth,opponent_Brighton & Hove Albion,opponent_Burnley,opponent_Chelsea,opponent_Crystal Palace,opponent_Everton,opponent_Huddersfield Town,opponent_Leicester City,opponent_Liverpool,opponent_Manchester City,opponent_Manchester United,opponent_Newcastle United,opponent_Southampton,opponent_Stoke City,opponent_Swansea City,opponent_Tottenham Hotspur,opponent_Watford,opponent_West Bromwich Albion,opponent_West Ham United,team,opponent,POT,POO,CRT,CRO,TCT,TCO,TTT,TTO,IPT,IPO,ART,ARO,CLT,CLO,TCP,TTP,OCP,OTP,FTR,TS,OS,TST,OST,TF,OF,TC,OC,TY,OY,TR,OR,Home
2017-08-11,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,Arsenal,Leicester City,68%,32%,20,18,801,427,23,17,13,11,18,17,33,29,565,681,192,315,2,27,6,10,3,9,12,9,4,0,1,0,0,1
2017-08-12,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,Brighton & Hove Albion,Manchester City,23%,77%,8,27,343,897,10,10,12,9,13,13,43,8,146,248,718,820,0,6,14,2,4,6,9,3,10,0,2,0,0,1
2017-08-12,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Chelsea,Burnley,62%,38%,26,16,667,453,10,8,9,13,15,21,23,44,468,568,257,348,0,19,10,6,5,16,11,8,5,3,3,2,0,1
2017-08-12,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,Crystal Palace,Huddersfield Town,56%,45%,27,14,561,460,24,28,19,9,18,16,20,32,333,444,230,356,0,14,8,4,6,7,19,12,9,1,3,0,0,1
2017-08-12,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,Everton,Stoke City,60%,40%,18,27,670,473,17,18,19,24,19,23,43,26,425,545,233,356,2,9,9,4,1,13,10,6,7,1,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-05-13,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,Chelsea,Newcastle United,58%,42%,14,15,719,550,18,20,14,18,26,11,39,12,512,606,335,443,0,6,16,2,6,10,11,2,4,1,0,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,Manchester City,Southampton,69%,31%,27,11,745,407,13,24,4,11,18,16,21,31,534,642,191,295,2,13,8,2,3,10,8,12,1,1,3,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,Stoke City,Swansea City,43%,58%,5,31,569,697,21,16,18,8,22,11,46,4,354,442,498,598,2,8,26,5,11,9,12,0,6,2,1,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,Leicester City,Tottenham Hotspur,36%,64%,9,13,414,617,23,20,16,9,15,10,19,19,210,299,414,523,0,16,14,9,6,13,9,4,4,2,1,0,0,0


In [91]:
season_data_complete.drop(["team","opponent"],axis=1,inplace=True)

In [92]:
season_data_complete

Unnamed: 0,team_Arsenal,team_Bournemouth,team_Brighton & Hove Albion,team_Burnley,team_Chelsea,team_Crystal Palace,team_Everton,team_Huddersfield Town,team_Leicester City,team_Liverpool,team_Manchester City,team_Manchester United,team_Newcastle United,team_Southampton,team_Stoke City,team_Swansea City,team_Tottenham Hotspur,team_Watford,team_West Bromwich Albion,team_West Ham United,opponent_Arsenal,opponent_Bournemouth,opponent_Brighton & Hove Albion,opponent_Burnley,opponent_Chelsea,opponent_Crystal Palace,opponent_Everton,opponent_Huddersfield Town,opponent_Leicester City,opponent_Liverpool,opponent_Manchester City,opponent_Manchester United,opponent_Newcastle United,opponent_Southampton,opponent_Stoke City,opponent_Swansea City,opponent_Tottenham Hotspur,opponent_Watford,opponent_West Bromwich Albion,opponent_West Ham United,POT,POO,CRT,CRO,TCT,TCO,TTT,TTO,IPT,IPO,ART,ARO,CLT,CLO,TCP,TTP,OCP,OTP,FTR,TS,OS,TST,OST,TF,OF,TC,OC,TY,OY,TR,OR,Home
2017-08-11,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,68%,32%,20,18,801,427,23,17,13,11,18,17,33,29,565,681,192,315,2,27,6,10,3,9,12,9,4,0,1,0,0,1
2017-08-12,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,23%,77%,8,27,343,897,10,10,12,9,13,13,43,8,146,248,718,820,0,6,14,2,4,6,9,3,10,0,2,0,0,1
2017-08-12,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,62%,38%,26,16,667,453,10,8,9,13,15,21,23,44,468,568,257,348,0,19,10,6,5,16,11,8,5,3,3,2,0,1
2017-08-12,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,56%,45%,27,14,561,460,24,28,19,9,18,16,20,32,333,444,230,356,0,14,8,4,6,7,19,12,9,1,3,0,0,1
2017-08-12,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,60%,40%,18,27,670,473,17,18,19,24,19,23,43,26,425,545,233,356,2,9,9,4,1,13,10,6,7,1,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-05-13,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,58%,42%,14,15,719,550,18,20,14,18,26,11,39,12,512,606,335,443,0,6,16,2,6,10,11,2,4,1,0,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,69%,31%,27,11,745,407,13,24,4,11,18,16,21,31,534,642,191,295,2,13,8,2,3,10,8,12,1,1,3,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,43%,58%,5,31,569,697,21,16,18,8,22,11,46,4,354,442,498,598,2,8,26,5,11,9,12,0,6,2,1,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,36%,64%,9,13,414,617,23,20,16,9,15,10,19,19,210,299,414,523,0,16,14,9,6,13,9,4,4,2,1,0,0,0


In [93]:
season_data_complete["POT"]=season_data_complete["POT"].str.replace("%","").astype('float') /100
season_data_complete["POO"]=season_data_complete["POO"].str.replace("%","").astype('float') /100

# converting the precentage string  data into float 


In [94]:
season_data_complete



Unnamed: 0,team_Arsenal,team_Bournemouth,team_Brighton & Hove Albion,team_Burnley,team_Chelsea,team_Crystal Palace,team_Everton,team_Huddersfield Town,team_Leicester City,team_Liverpool,team_Manchester City,team_Manchester United,team_Newcastle United,team_Southampton,team_Stoke City,team_Swansea City,team_Tottenham Hotspur,team_Watford,team_West Bromwich Albion,team_West Ham United,opponent_Arsenal,opponent_Bournemouth,opponent_Brighton & Hove Albion,opponent_Burnley,opponent_Chelsea,opponent_Crystal Palace,opponent_Everton,opponent_Huddersfield Town,opponent_Leicester City,opponent_Liverpool,opponent_Manchester City,opponent_Manchester United,opponent_Newcastle United,opponent_Southampton,opponent_Stoke City,opponent_Swansea City,opponent_Tottenham Hotspur,opponent_Watford,opponent_West Bromwich Albion,opponent_West Ham United,POT,POO,CRT,CRO,TCT,TCO,TTT,TTO,IPT,IPO,ART,ARO,CLT,CLO,TCP,TTP,OCP,OTP,FTR,TS,OS,TST,OST,TF,OF,TC,OC,TY,OY,TR,OR,Home
2017-08-11,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.68,0.32,20,18,801,427,23,17,13,11,18,17,33,29,565,681,192,315,2,27,6,10,3,9,12,9,4,0,1,0,0,1
2017-08-12,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0.23,0.77,8,27,343,897,10,10,12,9,13,13,43,8,146,248,718,820,0,6,14,2,4,6,9,3,10,0,2,0,0,1
2017-08-12,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.62,0.38,26,16,667,453,10,8,9,13,15,21,23,44,468,568,257,348,0,19,10,6,5,16,11,8,5,3,3,2,0,1
2017-08-12,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.56,0.45,27,14,561,460,24,28,19,9,18,16,20,32,333,444,230,356,0,14,8,4,6,7,19,12,9,1,3,0,0,1
2017-08-12,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0.60,0.40,18,27,670,473,17,18,19,24,19,23,43,26,425,545,233,356,2,9,9,4,1,13,10,6,7,1,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-05-13,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0.58,0.42,14,15,719,550,18,20,14,18,26,11,39,12,512,606,335,443,0,6,16,2,6,10,11,2,4,1,0,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0.69,0.31,27,11,745,407,13,24,4,11,18,16,21,31,534,642,191,295,2,13,8,2,3,10,8,12,1,1,3,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0.43,0.58,5,31,569,697,21,16,18,8,22,11,46,4,354,442,498,598,2,8,26,5,11,9,12,0,6,2,1,0,0,0
2018-05-13,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.36,0.64,9,13,414,617,23,20,16,9,15,10,19,19,210,299,414,523,0,16,14,9,6,13,9,4,4,2,1,0,0,0
