In [76]:
import pandas as pd 
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

In [77]:
foot_data = pd.read_csv("Football.csv")
foot_data.head()
print(foot_data.columns)


Index(['Div', 'Date', 'Time', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR',
       'HTHG', 'HTAG', 'HS', 'AS', 'HST', 'AST', 'HC', 'AC'],
      dtype='object')


In [78]:
foot_data = foot_data.drop(['FTR','Div', 'Date','Time'], axis=1)
print(foot_data.columns)

Index(['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'HTHG', 'HTAG', 'HS', 'AS',
       'HST', 'AST', 'HC', 'AC'],
      dtype='object')


In [79]:
foot_data.head()

Unnamed: 0,HomeTeam,AwayTeam,FTHG,FTAG,HTHG,HTAG,HS,AS,HST,AST,HC,AC
0,Fulham,Arsenal,0,3,0,1,5,13,2,6,2,3
1,Crystal Palace,Southampton,1,0,1,0,5,9,3,5,7,3
2,Liverpool,Leeds,4,3,3,2,22,6,6,3,9,0
3,West Ham,Newcastle,0,2,0,0,15,15,3,2,8,7
4,West Brom,Leicester,0,3,0,0,7,13,1,7,2,5


In [80]:
print(foot_data.dtypes)

HomeTeam    object
AwayTeam    object
FTHG         int64
FTAG         int64
HTHG         int64
HTAG         int64
HS           int64
AS           int64
HST          int64
AST          int64
HC           int64
AC           int64
dtype: object


In [81]:
# One-Hot Encoding for categorical variables
foot_data = pd.get_dummies(foot_data, columns=['HomeTeam', 'AwayTeam'], drop_first=False)


In [82]:
foot_data.head()

Unnamed: 0,FTHG,FTAG,HTHG,HTAG,HS,AS,HST,AST,HC,AC,...,AwayTeam_Liverpool,AwayTeam_Man City,AwayTeam_Man United,AwayTeam_Newcastle,AwayTeam_Sheffield United,AwayTeam_Southampton,AwayTeam_Tottenham,AwayTeam_West Brom,AwayTeam_West Ham,AwayTeam_Wolves
0,0,3,0,1,5,13,2,6,2,3,...,False,False,False,False,False,False,False,False,False,False
1,1,0,1,0,5,9,3,5,7,3,...,False,False,False,False,False,True,False,False,False,False
2,4,3,3,2,22,6,6,3,9,0,...,False,False,False,False,False,False,False,False,False,False
3,0,2,0,0,15,15,3,2,8,7,...,False,False,False,True,False,False,False,False,False,False
4,0,3,0,0,7,13,1,7,2,5,...,False,False,False,False,False,False,False,False,False,False


In [83]:
foot_data.describe()

Unnamed: 0,FTHG,FTAG,HTHG,HTAG,HS,AS,HST,AST,HC,AC
count,184.0,184.0,184.0,184.0,184.0,184.0,184.0,184.0,184.0,184.0
mean,1.369565,1.353261,0.711957,0.581522,12.586957,11.244565,4.505435,4.059783,5.586957,4.625
std,1.34056,1.338864,0.861228,0.832329,5.325785,4.961047,2.558597,2.372822,3.121062,2.585294
min,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,9.0,7.0,3.0,2.0,3.0,3.0
50%,1.0,1.0,1.0,0.0,12.0,11.0,4.0,3.5,5.0,4.0
75%,2.0,2.0,1.0,1.0,16.0,14.0,6.0,5.25,7.0,6.0
max,7.0,7.0,4.0,4.0,28.0,28.0,14.0,14.0,16.0,13.0


In [84]:
# Check for missing values
print(foot_data.isnull().sum())

# Drop rows with missing values
foot_data.dropna(inplace=True)


FTHG                         0
FTAG                         0
HTHG                         0
HTAG                         0
HS                           0
AS                           0
HST                          0
AST                          0
HC                           0
AC                           0
HomeTeam_Arsenal             0
HomeTeam_Aston Villa         0
HomeTeam_Brighton            0
HomeTeam_Burnley             0
HomeTeam_Chelsea             0
HomeTeam_Crystal Palace      0
HomeTeam_Everton             0
HomeTeam_Fulham              0
HomeTeam_Leeds               0
HomeTeam_Leicester           0
HomeTeam_Liverpool           0
HomeTeam_Man City            0
HomeTeam_Man United          0
HomeTeam_Newcastle           0
HomeTeam_Sheffield United    0
HomeTeam_Southampton         0
HomeTeam_Tottenham           0
HomeTeam_West Brom           0
HomeTeam_West Ham            0
HomeTeam_Wolves              0
AwayTeam_Arsenal             0
AwayTeam_Aston Villa         0
AwayTeam

In [85]:
# Check for duplicates
print(foot_data.duplicated().sum())

# Remove duplicates
foot_data.drop_duplicates(inplace=True)


0


In [86]:
# Check the cleaned DataFrame
print(foot_data.head())
print(foot_data.info())


   FTHG  FTAG  HTHG  HTAG  HS  AS  HST  AST  HC  AC  ...  AwayTeam_Liverpool  \
0     0     3     0     1   5  13    2    6   2   3  ...               False   
1     1     0     1     0   5   9    3    5   7   3  ...               False   
2     4     3     3     2  22   6    6    3   9   0  ...               False   
3     0     2     0     0  15  15    3    2   8   7  ...               False   
4     0     3     0     0   7  13    1    7   2   5  ...               False   

   AwayTeam_Man City  AwayTeam_Man United  AwayTeam_Newcastle  \
0              False                False               False   
1              False                False               False   
2              False                False               False   
3              False                False                True   
4              False                False               False   

   AwayTeam_Sheffield United  AwayTeam_Southampton  AwayTeam_Tottenham  \
0                      False                 False    