<a href="https://colab.research.google.com/github/Rohanpk23/European-Soccer-Database/blob/master/European_Soccer_Database_(SQLALCHEMY_ORM_based).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

S1 - Connecting to the Google drive

In [51]:
#Connecting to google drive for interacting with the sqlite database file
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


S2 - Using the interface (engine) to connect with the sqlite database & listing all tables in the database file

In [52]:
#engine is the common interface to connect to the local database
from sqlalchemy import create_engine, MetaData, Table, select, and_, or_, desc, func, case

import pandas as pd

import matplotlib.pyplot as plt

#Gets the metadata of the tables
metadata = MetaData() 

#Creating engine for the local file; sqlite:/// is the driver and football.sqite is the sqlite file
engine = create_engine('sqlite:////content/drive/My Drive/Colab Notebooks/football.sqlite')

#Connection established to access and manipulate data in database
connection = engine.connect()

Loading all the tables in schema

In [53]:
#All tables present in the database
print(engine.table_names())

['Country', 'League', 'Match', 'Player', 'Player_Attributes', 'Team', 'Team_Attributes', 'sqlite_sequence']


In [94]:
# Creating a reflection of the tables
player = Table('player', metadata, autoload = True, autoload_with = engine)
match = Table('match', metadata, autoload = True, autoload_with = engine)
team = Table('team', metadata, autoload = True, autoload_with = engine)

print(player.columns.keys())
print(repr(player))

['id', 'player_api_id', 'player_name', 'player_fifa_api_id', 'birthday', 'height', 'weight']
Table('player', MetaData(bind=None), Column('id', INTEGER(), table=<player>, primary_key=True, nullable=False), Column('player_api_id', INTEGER(), table=<player>), Column('player_name', TEXT(), table=<player>), Column('player_fifa_api_id', INTEGER(), table=<player>), Column('birthday', TEXT(), table=<player>), Column('height', INTEGER(), table=<player>), Column('weight', INTEGER(), table=<player>), schema=None)


Warmup practice

In [55]:
# stmt object has the select statement
stmt2 = select([player.columns.player_name])#, player.columns.height, player.columns.weight])
stmt2 = stmt2.where(
    and_(player.columns.height > 200, player.columns.weight > 200))

# connection.execute() method gives the ResultProxy object and fetchall() method gives the ResultSet object which is a list
results = connection.execute(stmt2).fetchmany(size = 50)


#Getting the records
for records in results:
  print(records.player_name) #, records.height, records.weight) # Data is retrieved column wise for every tuple stored in records

Abdoul Ba
Bogdan Milic
Costel Pantilimon
Jurgen Wevers
Kristof van Hout
Nikola Zigic
Pietro Marino
Stefan Maierhofer
Vanja Milinkovic-Savic
Wojciech Kaczmarek
Zeljko Kalac


In [None]:
for records in results:
  print(results[:20]) # This retrieves tuple wise 

Each season's count of total home goals and away goals scored

In [89]:
# SUM aggregation
Home_Tot = func.sum(match.columns.home_team_goal).label('Home Goals')
Away_Tot = func.sum(match.columns.away_team_goal).label('Away Goals')

# SUM aggregation included in SELECT clause
stmt3 = select([match.columns.season, Home_Tot, 
                Away_Tot, (Home_Tot - Away_Tot).label('Home - Away')])  

# GROUP BY clause
stmt3 = stmt3.group_by(match.columns.season)

# ORDER BY clause
stmt3_ordered= stmt3.order_by('Home Goals')   # Take care while ordering by an aliased columns

In [None]:
# Executing gives ResultSet Object (list containing tuples/rows)
results = connection.execute(stmt3_ordered).fetchall()

# Converting the ResultSet to a DataFrame
df3 = pd.DataFrame(results)
df3.columns = results[0].keys()
print(df3)

1. Want to know the home perfomance of the teams seasonwise ? Use this table

1.1. SQL statements

In [90]:
# CASE statement
cased_H= case(
             [
              (match.c.home_team_goal > match.c.away_team_goal, 'Win'),
              (match.c.home_team_goal < match.c.away_team_goal, 'Defeat')
             ],
              else_ = 'Tie'
            ).label('Outcome')

# CASE statement included in the select clause
stmt_H = select([match.columns.season, 
                 team.columns.team_long_name.label('Home Team'), 
                 match.columns.home_team_goal.label('Score'),
                 match.columns.away_team_goal.label('Opponent Score'),
                 cased_H])

# INNER JOIN CLAUSE
stmt_H_Teams = stmt_H.select_from(
                               match.join(team, match.c.home_team_api_id == team.c.team_api_id)
                              )  
# Sorting based on Score in descending order
stmt_H_Teams = stmt_H_Teams.order_by(desc('Score')) 

1.2. Execution and conversion to DataFrame

In [None]:
# Executing gives ResultSet Object (list containing tuples/rows)
result_H= connection.execute(stmt_H_Teams).fetchmany(size = 20)

# Converting the ResultSet to a DataFrame
df_H = pd.DataFrame(result_H)
df_H.columns = result[0].keys()
print('\n------------------------- Top 20 Home Perfomers ----------------------------\n')
print(df_H[:])

2. Want to know the Overseas perfomance of the teams seasonwise ? Use this table

2.1. SQL statements

In [92]:
# CASE statement
cased_A = case(
             [
              (match.c.away_team_goal > match.c.home_team_goal, 'Win'),
              (match.c.away_team_goal < match.c.home_team_goal, 'Loss')
             ],
              else_ = 'Tie'
            ).label('Outcome')

# CASE statement included in the select clause
stmt_A = select([match.columns.season, 
                team.columns.team_long_name.label('Away Team'), 
                match.columns.away_team_goal.label('Score'),
                match.columns.home_team_goal.label('Opponent Score'),
                cased_A])

# INNER JOIN CLAUSE
stmt_A_Teams = stmt_A.select_from(
                               match.join(team, match.c.away_team_api_id == team.c.team_api_id)
                              )  

# Sorting based on Score in descending order
stmt_A_Teams = stmt_A_Teams.order_by(desc('Score')) 

2.2. Execution and conversion to DataFrame

In [None]:
# Executing gives ResultSet Object (list containing tuples/rows)
result_A = connection.execute(stmt_A_Teams).fetchmany(size = 20)

# Converting the ResultSet to a DataFrame
df_A = pd.DataFrame(result_A)
df_A.columns = result[0].keys()
print('\n------------------------- Top 20 Overseas Perfomers ------------------------\n')
print(df_A[:])