# International Football Matches Between 1872 And 2024

#   1. Introduction
##  1.1 Imports, Server Connection & DB Set Up 

### 1.1.1 Imports

In [2]:
from configparser import ConfigParser

import pandas as pd
import psycopg2
import sqlalchemy
from sqlalchemy import text

### 1.1.2 config.ini schreiben

In [3]:
# config als Beispieldatei hochladen
# andere auf ignore

In [4]:
config = ConfigParser()
config.read('config.ini')

['config.ini']

### 1.1.3 Connection to football_db

In [ ]:
# Erstellung von football_db in SQL-Console und Connection über SQLAlchemy

In [5]:
connection_str = f'postgresql://postgres:{config['local_postgres']['password']}@localhost/postgres'

engine = sqlalchemy.create_engine(connection_str, isolation_level="AUTOCOMMIT")

In [6]:
connection = engine.connect()

In [7]:
connection.execute(text("""CREATE DATABASE football_db;"""))

ProgrammingError: (psycopg2.errors.DuplicateDatabase) FEHLER:  Datenbank »football_db« existiert bereits

[SQL: CREATE DATABASE football_db;]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [8]:
engine.dispose()
connection.close()

In [9]:
connection_str = f'postgresql://postgres:{config['local_postgres']['password']}@localhost/football_db'

engine = sqlalchemy.create_engine(connection_str, isolation_level="AUTOCOMMIT")

In [10]:
connection = engine.connect()

### 1.1.4 DataFrames for results, goalscorers & shootouts

In [11]:
goalscorers = pd.read_csv('goalscorers.csv')
results = pd.read_csv('results.csv')
shootouts = pd.read_csv('shootouts.csv')

### 1.1.5 Table Creation

In [39]:
# connection.execute(text('DROP TABLE IF EXISTS '))

In [12]:
# Creating results
results.to_sql('results', connection, index='index')
connection.commit()

ValueError: Table 'results' already exists.

In [46]:
# Creating goalscorers
goalscorers.to_sql('goalscorers', connection, index='index')
connection.commit()

In [47]:
# Creating shootouts
shootouts.to_sql('shootouts', connection, index='index')
connection.commit()

In [34]:
### Closure of connection
engine.dispose()
connection.close()

# Pre-Analysis

## results

In [22]:
results

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral
0,1872-11-30,Scotland,England,0,0,Friendly,Glasgow,Scotland,False
1,1873-03-08,England,Scotland,4,2,Friendly,London,England,False
2,1874-03-07,Scotland,England,2,1,Friendly,Glasgow,Scotland,False
3,1875-03-06,England,Scotland,2,2,Friendly,London,England,False
4,1876-03-04,Scotland,England,3,0,Friendly,Glasgow,Scotland,False
...,...,...,...,...,...,...,...,...,...
46284,2024-02-07,Ivory Coast,DR Congo,1,0,African Cup of Nations,Ebimpé,Ivory Coast,False
46285,2024-02-07,Nigeria,South Africa,1,1,African Cup of Nations,Bouaké,Ivory Coast,True
46286,2024-02-10,Qatar,Jordan,3,1,AFC Asian Cup,Lusail,Qatar,False
46287,2024-02-10,South Africa,DR Congo,0,0,African Cup of Nations,Abidjan,Ivory Coast,True


### Creating column result in results

In [77]:
connection.execute(text('''
                        ALTER TABLE results
                        ADD COLUMN IF NOT EXISTS result NUMERIC;
                        '''))
connection.execute(text('''
                        UPDATE results
                            SET result = CASE
                                             WHEN home_score - away_score > 0 THEN 1
                                             WHEN home_score - away_score < 0 THEN -1
                                             ELSE 0
                                         END;
                        '''))


<sqlalchemy.engine.cursor.CursorResult at 0x1d64a449780>

In [84]:
liste1 = []
for col in results.columns:
    liste1.append(col)
print(liste1)

['date', 'home_team', 'away_team', 'home_score', 'away_score', 'tournament', 'city', 'country', 'neutral']


In [69]:
# Getting unique tournaments
query = connection.execute(text(''' SELECT DISTINCT tournament FROM results;'''))
tournament_list = []
for cup in query.all():
    # print(cup)
    tournament_list.append(cup)
sorted(tournament_list)


[('ABCS Tournament',),
 ('AFC Asian Cup',),
 ('AFC Asian Cup qualification',),
 ('AFC Challenge Cup',),
 ('AFC Challenge Cup qualification',),
 ('AFF Championship',),
 ('AFF Championship qualification',),
 ('Africa Cup of Nations qualification',),
 ('African Cup of Nations',),
 ('African Cup of Nations qualification',),
 ('Afro-Asian Games',),
 ('Amílcar Cabral Cup',),
 ('Arab Cup',),
 ('Arab Cup qualification',),
 ('Asian Games',),
 ('Atlantic Cup',),
 ('Atlantic Heritage Cup',),
 ('Balkan Cup',),
 ('Baltic Cup',),
 ('Beijing International Friendship Tournament',),
 ('Benedikt Fontana Cup',),
 ('Bolivarian Games',),
 ('Brazil Independence Cup',),
 ('British Home Championship',),
 ('CAFA Nations Cup',),
 ('CCCF Championship',),
 ('CECAFA Cup',),
 ('CFU Caribbean Cup',),
 ('CFU Caribbean Cup qualification',),
 ('CONCACAF Championship',),
 ('CONCACAF Championship qualification',),
 ('CONCACAF Nations League',),
 ('CONCACAF Nations League qualification',),
 ('CONIFA Africa Football Cup',)

In [49]:
results.groupby('home_team')['tournament'].unique()

home_team
Abkhazia         [Friendly, CONIFA World Football Cup, CONIFA E...
Afghanistan      [Friendly, SAFF Cup, FIFA World Cup qualificat...
Albania          [Friendly, Balkan Cup, UEFA Euro qualification...
Alderney         [Muratti Vase, Island Games, Friendly, Niamh C...
Algeria          [Friendly, African Cup of Nations qualificatio...
                                       ...                        
Zambia           [Friendly, FIFA World Cup qualification, Afric...
Zanzibar         [Friendly, CECAFA Cup, FIFI Wild Cup, Viva Wor...
Zimbabwe         [Friendly, FIFA World Cup qualification, Afric...
Åland                                     [Friendly, Island Games]
Åland Islands                                       [Island Games]
Name: tournament, Length: 327, dtype: object

## goalscorers

In [73]:
goalscorers

Unnamed: 0,date,home_team,away_team,team,scorer,minute,own_goal,penalty
0,1916-07-02,Chile,Uruguay,Uruguay,José Piendibene,44.0,False,False
1,1916-07-02,Chile,Uruguay,Uruguay,Isabelino Gradín,55.0,False,False
2,1916-07-02,Chile,Uruguay,Uruguay,Isabelino Gradín,70.0,False,False
3,1916-07-02,Chile,Uruguay,Uruguay,José Piendibene,75.0,False,False
4,1916-07-06,Argentina,Chile,Argentina,Alberto Ohaco,2.0,False,False
...,...,...,...,...,...,...,...,...
44105,2024-02-10,Qatar,Jordan,Qatar,Akram Afif,73.0,False,True
44106,2024-02-10,Qatar,Jordan,Qatar,Akram Afif,90.0,False,True
44107,2024-02-11,Ivory Coast,Nigeria,Nigeria,William Troost-Ekong,38.0,False,False
44108,2024-02-11,Ivory Coast,Nigeria,Ivory Coast,Franck Kessié,62.0,False,False


## shootouts

In [14]:
shootouts

Unnamed: 0,date,home_team,away_team,winner,first_shooter
0,1967-08-22,India,Taiwan,Taiwan,
1,1971-11-14,South Korea,Vietnam Republic,South Korea,
2,1972-05-07,South Korea,Iraq,Iraq,
3,1972-05-17,Thailand,South Korea,South Korea,
4,1972-05-19,Thailand,Cambodia,Thailand,
...,...,...,...,...,...
594,2024-01-30,Saudi Arabia,South Korea,South Korea,Saudi Arabia
595,2024-01-31,Iran,Syria,Iran,Iran
596,2024-02-03,Cape Verde,South Africa,South Africa,Cape Verde
597,2024-02-03,Qatar,Uzbekistan,Qatar,Uzbekistan


In [21]:
shootouts.groupby('first_shooter')['first_shooter'].count()

first_shooter
Algeria          2
Argentina        7
Australia        5
Bahrain          1
Belgium          1
                ..
United States    6
Uruguay          7
Uzbekistan       1
Vietnam          1
Åland            1
Name: first_shooter, Length: 77, dtype: int64