In [0]:
df_goalscorers = spark.read.table('hive_metastore.default.goalscorers')
df_results = spark.read.table('hive_metastore.default.results')

In [0]:
df_goalscorers.head()

Row(date=datetime.date(1916, 7, 2), home_team='Chile', away_team='Uruguay', team='Uruguay', scorer='José Piendibene', minute='44', own_goal=False, penalty=False)

In [0]:
df_results.head()

Row(date=datetime.date(1872, 11, 30), home_team='Scotland', away_team='England', home_score=0, away_score=0, tournament='Friendly', city='Glasgow', country='Scotland', neutral=False)

### 1. Number of goals scored by Lionel Messi (excluding own goals).

In [0]:
df_goalscorers.createOrReplaceTempView('goalscorers')
spark.sql('SELECT COUNT(*) AS messi_goals FROM goalscorers WHERE scorer = "Lionel Messi" AND own_goal = "FALSE"').show()


+-----------+
|messi_goals|
+-----------+
|         55|
+-----------+



### 2. List of the 5 most recent matches played by the Spanish national team.

In [0]:
# Using goalscorers table
df_goalscorers.createOrReplaceTempView('goalscorers')
spark.sql('SELECT * FROM goalscorers WHERE home_team = "Spain" OR away_team = "Spain" ORDER BY date DESC LIMIT 5').show()

+----------+---------+---------+-------+-----------------+------+--------+-------+
|      date|home_team|away_team|   team|           scorer|minute|own_goal|penalty|
+----------+---------+---------+-------+-----------------+------+--------+-------+
|2024-07-14|    Spain|  England|  Spain|    Nico Williams|    47|   false|  false|
|2024-07-14|    Spain|  England|England|      Cole Palmer|    73|   false|  false|
|2024-07-14|    Spain|  England|  Spain|  Mikel Oyarzabal|    86|   false|  false|
|2024-07-09|    Spain|   France|  Spain|     Lamine Yamal|    21|   false|  false|
|2024-07-09|    Spain|   France| France|Randal Kolo Muani|     9|   false|  false|
+----------+---------+---------+-------+-----------------+------+--------+-------+



In [0]:
# Using results table
df_results.createOrReplaceTempView('results')
spark.sql('SELECT * FROM results WHERE home_team = "Spain" OR away_team = "Spain" ORDER BY date DESC LIMIT 5').show()

+----------+-----------+-----------+----------+----------+-------------------+--------------------+-----------+-------+
|      date|  home_team|  away_team|home_score|away_score|         tournament|                city|    country|neutral|
+----------+-----------+-----------+----------+----------+-------------------+--------------------+-----------+-------+
|2025-03-23|      Spain|Netherlands|         3|         3|UEFA Nations League|            Valencia|      Spain|  false|
|2025-03-20|Netherlands|      Spain|         2|         2|UEFA Nations League|           Rotterdam|Netherlands|  false|
|2024-11-18|      Spain|Switzerland|         3|         2|UEFA Nations League|Santa Cruz de Ten...|      Spain|  false|
|2024-11-15|    Denmark|      Spain|         1|         2|UEFA Nations League|          Copenhagen|    Denmark|  false|
|2024-10-15|      Spain|     Serbia|         3|         0|UEFA Nations League|             Cordoba|      Spain|  false|
+----------+-----------+-----------+----

### 3. Number of goals scored by Spain in its entire history. This information must be taken from results, as goalscorers does not contain all goals.

In [0]:
df_results.createOrReplaceTempView('results')
spark.sql('''
    SELECT 
        SUM(CASE WHEN home_team = 'Spain' THEN home_score ELSE 0 END) +
        SUM(CASE WHEN away_team = 'Spain' THEN away_score ELSE 0 END) AS total_spain_goals
    FROM results
''').show()

+-----------------+
|total_spain_goals|
+-----------------+
|             1567|
+-----------------+



### 4. List of the 5 highest scorers with the Spanish national team (excluding own goals).

In [0]:
df_goalscorers.createOrReplaceTempView('goalscorers')
spark.sql('''
    SELECT scorer, COUNT(*) AS goals
    FROM goalscorers
    WHERE team = 'Spain' AND own_goal = 'FALSE'
    GROUP BY scorer
    ORDER BY goals DESC
    LIMIT 5
''').show()

+---------------+-----+
|         scorer|goals|
+---------------+-----+
|    David Villa|   41|
|           Raúl|   32|
|  Álvaro Morata|   29|
|Fernando Torres|   28|
|Fernando Hierro|   25|
+---------------+-----+



### 5. List of Spanish players who have scored a penalty goal in a European Championship (UEFA Euro), sorted alphabetically.

In [0]:
df_goalscorers.createOrReplaceTempView('goalscorers')
df_results.createOrReplaceTempView('reults')
spark.sql('''
    SELECT DISTINCT scorer
    FROM results r
    JOIN goalscorers g
    ON r.date = g.date AND r.home_team = g.home_team AND r.away_team = g.away_team
    WHERE g.team = 'Spain' AND g.penalty = 'TRUE' AND r.tournament LIKE '%Euro%'
    ORDER BY scorer
''').show()

+--------------------+
|              scorer|
+--------------------+
|      Andrés Iniesta|
|         Daniel Ruiz|
|         David Villa|
|     Fernando Hierro|
|Francisco José Ca...|
|     Gaizka Mendieta|
|      José Claramunt|
|  Juan Antonio Señor|
|              Míchel|
|               Pirri|
|        Sergio Ramos|
|         Xabi Alonso|
|       Álvaro Morata|
+--------------------+



### 6. List of the 5 highest scorers in the final stages of the World Cup (FIFA World Cup) (excluding own goals).

In [0]:
df_goalscorers.createOrReplaceTempView('goalscorers')
df_results.createOrReplaceTempView('reults')
spark.sql('''
    SELECT scorer, COUNT(*) AS goals
    FROM results r
    JOIN goalscorers g
    ON r.date = g.date AND r.home_team = g.home_team AND r.away_team = g.away_team
    WHERE r.tournament = 'FIFA World Cup' AND g.own_goal = 'FALSE'
    GROUP BY scorer
    ORDER BY goals DESC
    LIMIT 5
''').show()

+--------------+-----+
|        scorer|goals|
+--------------+-----+
|Miroslav Klose|   16|
|       Ronaldo|   15|
|   Gerd Müller|   14|
|  Lionel Messi|   13|
| Just Fontaine|   13|
+--------------+-----+

