<a href="https://colab.research.google.com/github/Rohanpk23/European-Soccer-Database/blob/master/European_Soccer_Database_(SQLALCHEMY_ORM_based).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

S1 - Connecting to the Google drive

In [None]:
#Connecting to google drive for interacting with the sqlite database file
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


S2 - Using the interface (engine) to connect with the sqlite database & listing all tables in the database file

In [None]:
#engine is the common interface to connect to the local database
from sqlalchemy import create_engine, MetaData, Table, select, and_, or_, desc, func, text

import pandas as pd

import matplotlib.pyplot as plt

#Gets the metadata of the tables
metadata = MetaData() 

#Creating engine for the local file; sqlite:/// is the driver and football.sqite is the sqlite file
engine = create_engine('sqlite:////content/drive/My Drive/Colab Notebooks/football.sqlite')

#Connection established to access and manipulate data in database
connection = engine.connect()

Loading all the tables in schema

In [None]:
#All tables present in the database
print(engine.table_names())

['Country', 'League', 'Match', 'Player', 'Player_Attributes', 'Team', 'Team_Attributes', 'sqlite_sequence']


In [None]:
# Creating a reflection of the table match
player = Table('player', metadata, autoload = True, autoload_with = engine)
print(player.columns.keys())
print(repr(player))

['id', 'player_api_id', 'player_name', 'player_fifa_api_id', 'birthday', 'height', 'weight']
Table('player', MetaData(bind=None), Column('id', INTEGER(), table=<player>, primary_key=True, nullable=False), Column('player_api_id', INTEGER(), table=<player>), Column('player_name', TEXT(), table=<player>), Column('player_fifa_api_id', INTEGER(), table=<player>), Column('birthday', TEXT(), table=<player>), Column('height', INTEGER(), table=<player>), Column('weight', INTEGER(), table=<player>), schema=None)


# Pythonic way of accessing any table's data

Remember to follow the execution flow of SQL (Not a rigid rule)

In [None]:
# stmt object has the select statement
stmt2 = select([player.columns.player_name])#, player.columns.height, player.columns.weight])
stmt2 = stmt2.where(
    and_(player.columns.height > 200, player.columns.weight > 200))

# connection.execute() method gives the ResultProxy object and fetchall() method gives the ResultSet object which is a list
results = connection.execute(stmt2).fetchmany(size = 50)


#Getting the records
for records in results:
  print(records.player_name) #, records.height, records.weight) # Data is retrieved column wise for every tuple stored in records

Abdoul Ba
Bogdan Milic
Costel Pantilimon
Jurgen Wevers
Kristof van Hout
Nikola Zigic
Pietro Marino
Stefan Maierhofer
Vanja Milinkovic-Savic
Wojciech Kaczmarek
Zeljko Kalac


In [None]:
for records in results:
  print(results[:20]) # This retrieves tuple wise 

[('Abdoul Ba',), ('Bogdan Milic',), ('Costel Pantilimon',), ('Jurgen Wevers',), ('Kristof van Hout',), ('Nikola Zigic',), ('Pietro Marino',), ('Stefan Maierhofer',), ('Vanja Milinkovic-Savic',), ('Wojciech Kaczmarek',), ('Zeljko Kalac',)]
[('Abdoul Ba',), ('Bogdan Milic',), ('Costel Pantilimon',), ('Jurgen Wevers',), ('Kristof van Hout',), ('Nikola Zigic',), ('Pietro Marino',), ('Stefan Maierhofer',), ('Vanja Milinkovic-Savic',), ('Wojciech Kaczmarek',), ('Zeljko Kalac',)]
[('Abdoul Ba',), ('Bogdan Milic',), ('Costel Pantilimon',), ('Jurgen Wevers',), ('Kristof van Hout',), ('Nikola Zigic',), ('Pietro Marino',), ('Stefan Maierhofer',), ('Vanja Milinkovic-Savic',), ('Wojciech Kaczmarek',), ('Zeljko Kalac',)]
[('Abdoul Ba',), ('Bogdan Milic',), ('Costel Pantilimon',), ('Jurgen Wevers',), ('Kristof van Hout',), ('Nikola Zigic',), ('Pietro Marino',), ('Stefan Maierhofer',), ('Vanja Milinkovic-Savic',), ('Wojciech Kaczmarek',), ('Zeljko Kalac',)]
[('Abdoul Ba',), ('Bogdan Milic',), ('Costel

Each season's count of total home goals and away goals scored

In [None]:
match= Table('match', metadata, autoload = True, autoload_with = engine)

# Total home_goal object
Home_Tot = func.sum(match.columns.home_team_goal).label('Home Goals')
Away_Tot = func.sum(match.columns.away_team_goal).label('Away Goals')

# SELECT statement
stmt3 = select([match.columns.season, Home_Tot, 
                Away_Tot, (Home_Tot - Away_Tot).label('Home - Away')])  
# GROUP BY clause
stmt3 = stmt3.group_by(match.columns.season)
# ORDER BY clause
stmt3_ordered= stmt3.order_by('Home Goals')   # Take care while ordering by an aliased columns

In [None]:
# Execution to get the ResultSet Object (list containing tuples)
results = connection.execute(stmt3_ordered).fetchall()

#Converting to DataFrame
df3 = pd.DataFrame(results)
df3.columns = results[0].keys()  # 
print(df3)

      season  Home Goals  Away Goals  Home - Away
0  2013/2014        4787        3602         1185
1  2009/2010        4978        3654         1324
2  2008/2009        5007        3665         1342
3  2010/2011        5048        3701         1347
4  2012/2013        5053        3986         1067
5  2014/2015        5055        3842         1213
6  2011/2012        5064        3683         1381
7  2015/2016        5135        4027         1108


In [None]:
stmt4 = select([match.columns.home_team_api_id, match.columns.away_team_api_id,
                case([home_team_goal > away_team_goal, "Win"],
                     [home_team_goal < away_team_goal, "Loss"],
                     else_ = 'tie')
                ])