## Display Features

In [48]:
import pandas as pd
import numpy as np
import sqlite3
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import warnings

from xml.etree.ElementTree import fromstring, ElementTree
import xml.etree.ElementTree as ET

In [49]:
%matplotlib inline
pd.options.display.max_rows = 300000
pd.options.display.max_columns = 999
pd.options.display.max_colwidth = 500

# Load Datasets

## Players

In [50]:
players = pd.read_csv(
    "C:\\Users\\Space\\Documents\\py\\Projects\\TuringCollege\\Football\\DataSets\\players.csv",
    index_col=0,
)

In [51]:
players.head()

Unnamed: 0,PlayerID,Date,PreferredFoot,AttachWorkRate,DefenceWorkRate,Birthday,Height,Weight,BMI,OverallRating,Potential,Crossing,Finishing,HeadingAccuracy,ShortPassing,Volleys,Dribbling,Curve,FreeKickAccuracy,LongPassing,BallControl,Acceleration,SprintSpeed,Agility,Reactions,Balance,ShotPower,Jumping,Stamina,Strength,LongShots,Aggression,Interceptions,Positioning,Vision,Penalties,Marking,StandingTackle,SlidingTackle
0,2625,2007,right,high,medium,1981.0,175.26,154.0,22.74,63.0,64.0,48.0,48.0,47.0,64.0,38.0,57.0,50.0,48.5,67.0,57.0,67.0,64.0,59.0,52.0,49.0,61.0,56.0,78.0,56.0,59.0,72.0,52.0,55.0,56.0,46.0,64.0,66.0,63.0
1,2625,2008,right,high,medium,1981.0,175.26,154.0,22.74,60.0,64.0,48.0,48.0,47.0,64.0,38.0,57.0,50.0,51.0,67.0,57.0,67.0,64.0,59.0,52.0,49.0,61.0,56.0,78.0,56.0,59.0,72.0,52.0,55.0,56.0,46.0,64.0,66.0,63.0
2,2625,2010,right,high,medium,1981.0,175.26,154.0,22.74,60.0,64.0,48.0,48.0,47.0,64.0,38.0,57.0,50.0,51.0,67.0,57.0,67.0,64.0,59.0,50.0,49.0,71.0,56.0,78.0,56.0,59.0,72.0,71.0,50.0,56.0,69.0,64.0,66.0,63.0
3,2625,2011,right,high,medium,1981.0,175.26,154.0,22.74,58.5,60.5,52.0,47.0,46.0,58.0,37.0,56.0,49.0,50.0,66.0,58.0,66.0,63.0,58.0,49.0,61.5,68.0,55.0,77.0,55.0,58.0,71.0,65.0,49.0,55.0,66.0,63.0,63.0,57.0
4,2625,2012,right,medium,medium,1981.0,175.26,154.0,22.74,58.0,58.0,52.0,47.0,46.0,53.0,37.0,56.0,49.0,50.0,66.0,58.0,67.0,63.0,67.0,49.0,76.0,68.0,55.0,77.0,55.5,58.0,71.0,64.0,49.0,55.0,66.0,63.0,63.0,52.0


In [52]:
players.shape

(71615, 39)

In [53]:
overall_rating = players[["Date", "PlayerID", "OverallRating"]]
overall_rating.head()

Unnamed: 0,Date,PlayerID,OverallRating
0,2007,2625,63.0
1,2008,2625,60.0
2,2010,2625,60.0
3,2011,2625,58.5
4,2012,2625,58.0


## Matches

In [54]:
matches = pd.read_csv(
    "C:\\Users\\Space\\Documents\\py\\Projects\\TuringCollege\\Football\\DataSets\\matches.csv",
    index_col=0,
)

In [55]:
sample_matches = matches[["Date", "HomePlayer1", "HomePlayer2", "HomePlayer3", "HomePlayer4", "HomePlayer5"]]
sample_matches.head()

Unnamed: 0,Date,HomePlayer1,HomePlayer2,HomePlayer3,HomePlayer4,HomePlayer5
0,2010,38391.0,33620.0,38388.0,38389.0,38253.0
1,2010,38391.0,38389.0,38253.0,69713.0,94553.0
2,2010,104388.0,39977.0,93054.0,46666.0,93344.0
3,2010,39573.0,67950.0,39389.0,80184.0,67941.0
4,2010,37868.0,33595.0,37866.0,3329.0,149150.0


In [56]:
def overall_function(player, date, player_id, rating_column, left_dataframe, right_dataframe):

    df_merge = pd.merge(left_dataframe, right_dataframe, left_on=[date, player], right_on=[date, player_id])
    df_merge[player] = df_merge[rating_column]
    del df_merge[rating_column]
    del df_merge[player_id]

    return df_merge

In [57]:
player = "HomePlayer1"
date = "Date"
player_id = "PlayerID"
rating_column = "OverallRating"
left_dataframe = sample_matches
right_dataframe = overall_rating


df_merge = overall_function(player, date, player_id, rating_column, left_dataframe, right_dataframe)

In [58]:
df_merge.head()

Unnamed: 0,Date,HomePlayer1,HomePlayer2,HomePlayer3,HomePlayer4,HomePlayer5
0,2010,72.0,33620.0,38388.0,38389.0,38253.0
1,2010,72.0,38389.0,38253.0,69713.0,94553.0
2,2010,72.0,33620.0,38389.0,38393.0,69713.0
3,2010,72.0,38388.0,38389.0,38253.0,69713.0
4,2010,72.0,38388.0,38389.0,38253.0,69713.0


In [59]:
df_merge.shape

(17832, 6)

In [60]:
df_merge.isna().sum()

Date             0
HomePlayer1      0
HomePlayer2    200
HomePlayer3    188
HomePlayer4    202
HomePlayer5    187
dtype: int64

In [61]:
player = "HomePlayer2"
date = "Date"
player_id = "PlayerID"
rating_column = "OverallRating"
left_dataframe = sample_matches
right_dataframe = overall_rating


df_merge = overall_function(player, date, player_id, rating_column, left_dataframe, right_dataframe)

In [62]:
df_merge.sort_values("Date").head()

Unnamed: 0,Date,HomePlayer1,HomePlayer2,HomePlayer3,HomePlayer4,HomePlayer5
0,2010,38391.0,72.5,38388.0,38389.0,38253.0
13477,2010,32418.0,63.0,32436.0,23869.0,40570.0
13478,2010,43230.0,66.5,35500.0,32556.0,37240.0
13479,2010,148483.0,66.5,39054.0,37307.0,70969.0
13480,2010,43230.0,66.5,35500.0,29049.0,32556.0


In [63]:
df_merge.shape

(17664, 6)

In [64]:
df_merge.isna().sum()

Date             0
HomePlayer1    160
HomePlayer2      0
HomePlayer3    193
HomePlayer4    229
HomePlayer5    205
dtype: int64

In [65]:
player = "HomePlayer3"
date = "Date"
player_id = "PlayerID"
rating_column = "OverallRating"
left_dataframe = sample_matches
right_dataframe = overall_rating


df_merge = overall_function(player, date, player_id, rating_column, left_dataframe, right_dataframe)

In [66]:
df_merge.sort_values("Date").head()

Unnamed: 0,Date,HomePlayer1,HomePlayer2,HomePlayer3,HomePlayer4,HomePlayer5
0,2010,38391.0,33620.0,72.0,38389.0,38253.0
13563,2010,24984.0,109147.0,64.5,35968.0,32891.0
13564,2010,24984.0,32725.0,64.5,35968.0,32891.0
13565,2010,24984.0,32725.0,64.5,34622.0,35968.0
13566,2010,23918.0,23081.0,64.5,35968.0,32891.0


In [67]:
df_merge.shape

(17781, 6)

In [68]:
df_merge.isna().sum()

Date             0
HomePlayer1    161
HomePlayer2    202
HomePlayer3      0
HomePlayer4    223
HomePlayer5    190
dtype: int64

In [69]:
player = "HomePlayer4"
date = "Date"
player_id = "PlayerID"
rating_column = "OverallRating"
left_dataframe = sample_matches
right_dataframe = overall_rating


df_merge = overall_function(player, date, player_id, rating_column, left_dataframe, right_dataframe)

In [70]:
df_merge.sort_values("Date").head()

Unnamed: 0,Date,HomePlayer1,HomePlayer2,HomePlayer3,HomePlayer4,HomePlayer5
0,2010,38391.0,33620.0,38388.0,74.0,38253.0
13513,2010,40602.0,181227.0,43246.0,74.0,35502.0
13514,2010,40602.0,160568.0,43246.0,74.0,35502.0
13515,2010,37780.0,160568.0,43246.0,74.0,35502.0
13516,2010,40602.0,181227.0,43246.0,74.0,35502.0


In [71]:
df_merge.shape

(17704, 6)

In [72]:
df_merge.isna().sum()

Date             0
HomePlayer1    143
HomePlayer2    200
HomePlayer3    189
HomePlayer4      0
HomePlayer5    187
dtype: int64

In [73]:
player = "HomePlayer5"
date = "Date"
player_id = "PlayerID"
rating_column = "OverallRating"
left_dataframe = sample_matches
right_dataframe = overall_rating


df_merge = overall_function(player, date, player_id, rating_column, left_dataframe, right_dataframe)

In [74]:
df_merge.sort_values("Date").head()

Unnamed: 0,Date,HomePlayer1,HomePlayer2,HomePlayer3,HomePlayer4,HomePlayer5
0,2010,38391.0,33620.0,38388.0,38389.0,70.5
13527,2010,33914.0,32969.0,32457.0,32968.0,65.0
13528,2010,24984.0,32725.0,23310.0,35968.0,65.5
13529,2010,24984.0,109147.0,23310.0,35968.0,65.5
13530,2010,23918.0,23081.0,32725.0,23310.0,65.5


In [75]:
df_merge.shape

(17765, 6)

In [76]:
df_merge.isna().sum()

Date             0
HomePlayer1    147
HomePlayer2    204
HomePlayer3    183
HomePlayer4    207
HomePlayer5      0
dtype: int64