In [1]:
import numpy as np
import pandas as pd

In [2]:
fileName = 'Data/results.csv'
data = pd.read_csv(fileName)

In [3]:
# Columns in pandas replace the numerical slicing operations
# Here I can look at the column names
data.columns

Index(['date', 'home_team', 'away_team', 'home_score', 'away_score',
       'tournament', 'city', 'country', 'neutral'],
      dtype='object')

In [4]:
# To look at data in a column
print(data['home_team'])

0            Scotland
1             England
2            Scotland
3             England
4            Scotland
             ...     
43183        Suriname
43184    Burkina Faso
43185        Cameroon
43186        Cameroon
43187         Senegal
Name: home_team, Length: 43188, dtype: object


In [5]:
print(data['country'][43])

Ireland


In [6]:
teams = data['home_team'].unique() # This is a really useful feature, it lists the all the unique values in a column.

Look at the structure of the command above. data[NAME OF COLUM] returns the column, the .unique() method returns the unique values in that column.

In [7]:
print(teams) # So what I have is a list of all the unique values (teams) in the column

['Scotland' 'England' 'Wales' 'Northern Ireland' 'United States' 'Uruguay'
 'Austria' 'Hungary' 'Argentina' 'Belgium' 'France' 'Netherlands'
 'Czechoslovakia' 'Switzerland' 'Sweden' 'Germany' 'Italy' 'Chile'
 'Norway' 'Finland' 'Luxembourg' 'Russia' 'Denmark' 'Catalonia'
 'Basque Country' 'Brazil' 'Japan' 'Paraguay' 'Canada' 'Estonia'
 'Costa Rica' 'Guatemala' 'Spain' 'Brittany' 'Poland' 'Yugoslavia'
 'New Zealand' 'Romania' 'Latvia' 'Galicia' 'Portugal' 'Andalusia'
 'China PR' 'Australia' 'Lithuania' 'Turkey' 'Central Spain' 'Mexico'
 'Aruba' 'Egypt' 'Haiti' 'Philippines' 'Bulgaria' 'Jamaica' 'Kenya'
 'Bolivia' 'Peru' 'Honduras' 'Guyana' 'Uganda' 'Belarus' 'El Salvador'
 'Barbados' 'Republic of Ireland' 'Trinidad and Tobago' 'Greece' 'Curaçao'
 'Dominica' 'Silesia' 'Guadeloupe' 'Israel' 'Suriname' 'French Guiana'
 'Cuba' 'Colombia' 'Ecuador' 'Saint Kitts and Nevis' 'Panama' 'Slovakia'
 'Manchukuo' 'Croatia' 'Nicaragua' 'Afghanistan' 'India' 'Martinique'
 'Zimbabwe' 'Iceland' 'Albania'

Lets look at all the games involving Scotland

In [6]:
print(data['home_team']=='Scotland')
# This has made a new data table for me

0         True
1        False
2         True
3        False
4         True
         ...  
43183    False
43184    False
43185    False
43186    False
43187    False
Name: home_team, Length: 43188, dtype: bool


In [7]:
# But this has two problems (other than being a new table)
# 1.) this is only for the home team
# 2.) I have true and false

We can fix this by using an OR (|) command.  So now our condition return true when the home team is Scotland or the away team is Scotland.

In [11]:
print((data['home_team']=='Scotland') | (data['away_team']=='Scotland'))

0         True
1         True
2         True
3         True
4         True
         ...  
43183    False
43184    False
43185    False
43186    False
43187    False
Length: 43188, dtype: bool


# Ok, now we have both the home and away teams

In [9]:
# we can feed the true false data in to the original data frame
#_______________\/__________
scottishData = data[(data['home_team']=='Scotland') | (data['away_team']=='Scotland')]

Think about this, the True or False values from before are being used to return a new data frame, which is made from only the rows which correspond to true.

In [10]:
print('Scotland have played', scottishData.shape[0],'times')

Scotland have played 804 times


In [15]:
# How many times have Scotland won at home
scottishHomeWins = scottishData[(scottishData['home_team'] == 'Scotland') & (scottishData['home_score'] > scottishData['away_score'])]

What this means is that when Scotland is the home team and the home score is higher than the away score then Scotland have won

In [16]:
# How many times have Scotland won away
scottishAwayWins = scottishData[(scottishData['away_team'] == 'Scotland') & (scottishData['away_score'] > scottishData['home_score'])]

In [17]:
print('Scotland won at home', scottishHomeWins.shape[0], 'times')
print('Scotland won at away', scottishAwayWins.shape[0], 'times')

Scotland won at home 221 times
Scotland won at away 161 times


In [18]:
print('Scotland won', 100 * (scottishHomeWins.shape[0] + scottishAwayWins.shape[0]) / scottishData.shape[0], '%')

Scotland won 47.51243781094527 %


In [19]:
scottishData

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral
0,1872-11-30,Scotland,England,0,0,Friendly,Glasgow,Scotland,False
1,1873-03-08,England,Scotland,4,2,Friendly,London,England,False
2,1874-03-07,Scotland,England,2,1,Friendly,Glasgow,Scotland,False
3,1875-03-06,England,Scotland,2,2,Friendly,London,England,False
4,1876-03-04,Scotland,England,3,0,Friendly,Glasgow,Scotland,False
...,...,...,...,...,...,...,...,...,...
42725,2021-09-07,Austria,Scotland,0,1,FIFA World Cup qualification,Vienna,Austria,False
42821,2021-10-09,Scotland,Israel,3,2,FIFA World Cup qualification,Glasgow,Scotland,False
42890,2021-10-12,Faroe Islands,Scotland,0,1,FIFA World Cup qualification,Tórshavn,Faroe Islands,False
42969,2021-11-12,Moldova,Scotland,0,2,FIFA World Cup qualification,Chișinău,Moldova,False


In [20]:
data[data['home_team'].str.contains('york', case=False)]

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral
39696,2018-01-28,Yorkshire,Ellan Vannin,1,1,Friendly,Fitzwilliam,England,False
39787,2018-03-25,Yorkshire,Chagos Islands,6,0,Friendly,Fitzwilliam,England,False
40479,2018-11-18,Yorkshire,Panjab,5,4,Friendly,Ossett,England,False
40756,2019-05-04,Yorkshire,Somaliland,6,2,Friendly,Athersley,England,False
40777,2019-06-01,Yorkshire,Parishes of Jersey,1,0,Atlantic Heritage Cup,Ossett,England,False
