# Perform SQL Queries for Data Wrangling

In [1]:
# Import necessary libraries
import sqlite3
import pandas as pd

In [3]:
# Create a connection to the SQLite database
conn = sqlite3.connect('epl_data.db')

In [7]:
# Query 1: Get the top 5 teams with the most goals scored at home
query1 = '''
SELECT HomeTeam, SUM(FTHG) AS TotalHomeGoals
FROM epl
Group BY HomeTeam
ORDER BY TotalHomeGoals DESC
LIMIT 5
'''

top_home_teams = pd.read_sql(query1, conn)
print('Top 5 with the most goals scored at home:')
print(top_home_teams)

Top 5 with the most goals scored at home:
    HomeTeam  TotalHomeGoals
0   Brighton              55
1     Wolves              53
2   Man City              51
3  Newcastle              44
4    Burnley              44


In [9]:
# Query 2: Get the top teams with most goals scored away
query2 = '''
SELECT AwayTeam, SUM(FTAG) AS TotalAwayGoals
FROM epl
GROUP BY AwayTeam
ORDER BY TotalAwayGoals DESC
LIMIT 5
'''

top_away_teams = pd.read_sql(query2, conn)
print('\n Top 5 teams with most away goals scored')
print(top_away_teams)


 Top 5 teams with most away goals scored
    AwayTeam  TotalAwayGoals
0   Man City              71
1  Liverpool              62
2    Burnley              59
3    Chelsea              48
4    Watford              44


In [10]:
# Query 3: Get the top 5 referees who officiated the most matches
query3 = '''
SELECT Referee, COUNT(*) AS MatchesOfficiated
FROM epl
GROUP BY Referee
ORDER BY MatchesOfficiated DESC
LIMIT 5
'''

top_referees = pd.read_sql(query3, conn)
print('\nTop 5 referees who officiated the most matches:')
print(top_referees)


Top 5 referees who officiated the most matches:
          Referee  MatchesOfficiated
0       Mike Dean                 81
1     Paul Tierry                 80
2  Michael Oliver                 75
3  Anthony Taylor                 73
4   Martin Atkins                 71


In [11]:
# Close the connection
conn.close()