## EAE - Introduction to Programming Languages for Data 
## Day 10 - 21/12/2023

### Instructor:  
Enric Domingo  
*Machine Learning and Software Engineer at ERNI*  
edomingod@professional.eae.es

#### SQL + Python:

1. Recap

2. Intermediate SQL Commands (part 2):
  - `GROUP BY`: grouping data by a column
    - SELECT <column_name>, <aggregate_function>(<column_name>) FROM <table_name> GROUP BY <column_name>;
   
  - `HAVING`: filtering data after grouping
    - SELECT <column_name>, <aggregate_function>(<column_name>) FROM <table_name> GROUP BY <column_name> HAVING <condition>;
   
  - `ORDER BY`: sorting data
    - SELECT <column_name> FROM <table_name> ORDER BY <column_name> ASC|DESC;
   
  - `LIMIT`: limiting the number of rows returned
    - SELECT <column_name>, <aggregate_function>(<column_name>) FROM <table_name> LIMIT <number>;
  
  - `CREATE TABLE`: creating a new table
    - CREATE TABLE <table_name> (<column_name> <data_type>, <column_name> <data_type>, ...);
   
  - `INSERT INTO`: inserting rows into a table
    - INSERT INTO <table_name> (<column_name>, <column_name>, ...) VALUES (<value>, <value>, ...);
   
  - `UPDATE`: updating rows in a table
    - UPDATE <table_name> SET <column_name> = <value> WHERE <condition>;
   
  - `DELETE`: deleting rows from a table
    - DELETE FROM <table_name> WHERE <condition>;

  - `DROP TABLE`: deleting a table
    - DROP TABLE <table_name>;

3. Python refresh and practice

4. Python Lab 🧪

---
## 1. Recap

- Intermidiate SQL Commands:
   - SQL Functions
   - SQL JOIN (INNER, LEFT, RIGHT, FULL)

In [1]:
# Preaparing the database for the exercises

# We will create a database with 3 tables: Students, Subjects and Classrooms

import sqlite3
import os
import pandas as pd


if "nba.db" in os.listdir():
    os.remove("nba.db")

if "nba.db" not in os.listdir():

    print("Creating database...")

    # Connect to SQLite database (or create it)
    connection = sqlite3.connect("nba.db")

    # Create cursor object
    cursor = connection.cursor()

    # Create Teams table
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS Teams (
        Team_ID INTEGER PRIMARY KEY,
        Team_Name TEXT,
        City TEXT
    )
    """)

    # Create Players table
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS Players (
        Player_ID INTEGER PRIMARY KEY,
        Player_Name TEXT,
        Position TEXT,
        Season_Points INTEGER,
        Team_ID INTEGER,
        FOREIGN KEY(Team_ID) REFERENCES Teams(Team_ID)
    )
    """)

    # Create Matches table
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS Matches (
        Match_ID INTEGER PRIMARY KEY,
        Match_Date TEXT,
        Home_Team_ID INTEGER,
        Away_Team_ID INTEGER,
        Home_Score INTEGER,
        Away_Score INTEGER,
        FOREIGN KEY(Home_Team_ID) REFERENCES Teams(Team_ID),
        FOREIGN KEY(Away_Team_ID) REFERENCES Teams(Team_ID)
    )
    """)

    # Insert data into Teams table
    cursor.execute("INSERT INTO Teams VALUES (1, 'Lakers', 'Los Angeles')")
    cursor.execute("INSERT INTO Teams VALUES (2, 'Warriors', 'Golden State')")
    cursor.execute("INSERT INTO Teams VALUES (3, 'Nets', 'Brooklyn')")
    cursor.execute("INSERT INTO Teams VALUES (4, 'Bucks', 'Milwaukee')")

    # Insert data into Players table
    cursor.execute("INSERT INTO Players VALUES (1, 'LeBron James', 'SF', 186, 1)")
    cursor.execute("INSERT INTO Players VALUES (2, 'Anthony Davis', 'PF', 157, 1)")
    cursor.execute("INSERT INTO Players VALUES (3, 'Russell Westbrook', 'PG', 178, 1)")
    cursor.execute("INSERT INTO Players VALUES (4, 'Carmelo Anthony', 'F', 231, 1)")
    cursor.execute("INSERT INTO Players VALUES (5, 'Dwight Howard', 'C', 45, 1)")
    cursor.execute("INSERT INTO Players VALUES (6, 'Rajon Rondo', 'PG', 24, 1)")
    cursor.execute("INSERT INTO Players VALUES (7, 'Jarred Vanderbilt', 'F', 120, 1)")

    cursor.execute("INSERT INTO Players VALUES (8, 'Stephen Curry', 'PG', 49, 2)")
    cursor.execute("INSERT INTO Players VALUES (9, 'Klay Thompson', 'SG', 15, 2)")
    cursor.execute("INSERT INTO Players VALUES (10, 'Draymond Green', 'PF', 98, 2)")
    cursor.execute("INSERT INTO Players VALUES (11, 'Andrew Wiggins', 'SF', 189, 2)")
    cursor.execute("INSERT INTO Players VALUES (12, 'James Wiseman', 'C', 73, 2)")

    cursor.execute("INSERT INTO Players VALUES (13, 'Kevin Durant', 'SF', 180, 3)")
    cursor.execute("INSERT INTO Players VALUES (14, 'James Harden', 'SG', 33, 3)")
    cursor.execute("INSERT INTO Players VALUES (15, 'Kyrie Irving', 'PG', 12, 3)")
    cursor.execute("INSERT INTO Players VALUES (16, 'Blake Griffin', 'PF', 206, 3)")
    cursor.execute("INSERT INTO Players VALUES (17, 'LaMarcus Aldridge', 'C', 82, 3)")
    cursor.execute("INSERT INTO Players VALUES (18, 'Giannis Antetokounmpo', 'PF', 194, 3)")

    cursor.execute("INSERT INTO Players VALUES (19, 'Giannis Antetokounmpo', 'PF', 245, 4)")
    cursor.execute("INSERT INTO Players VALUES (20, 'Khris Middleton', 'SF', 214, 4)")
    cursor.execute("INSERT INTO Players VALUES (21, 'Jrue Holiday', 'PG', 63, 4)")
    cursor.execute("INSERT INTO Players VALUES (22, 'Brook Lopez', 'C', 84, 4)")
    cursor.execute("INSERT INTO Players VALUES (23, 'Donte DiVincenzo', 'SG', 27, 4)")
    cursor.execute("INSERT INTO Players VALUES (24, 'MarJon Beauchamp', 'F', 171, 4)")
    cursor.execute("INSERT INTO Players VALUES (25, 'Pat Connaughton', 'SG', 59, 4)")

    # Insert data into Matches table
    cursor.execute("INSERT INTO Matches VALUES (1, '2023-01-05', 1, 2, 105, 100)")
    cursor.execute("INSERT INTO Matches VALUES (2, '2023-01-15', 3, 4, 110, 115)")
    cursor.execute("INSERT INTO Matches VALUES (3, '2023-01-27', 2, 3, 120, 125)")
    cursor.execute("INSERT INTO Matches VALUES (4, '2023-02-10', 4, 1, 130, 125)")
    cursor.execute("INSERT INTO Matches VALUES (5, '2023-02-21', 1, 3, 110, 120)")
    cursor.execute("INSERT INTO Matches VALUES (6, '2023-02-22', 2, 4, 115, 120)")
    cursor.execute("INSERT INTO Matches VALUES (7, '2023-03-05', 3, 1, 105, 100)")
    cursor.execute("INSERT INTO Matches VALUES (8, '2023-03-20', 4, 2, 125, 130)")
    cursor.execute("INSERT INTO Matches VALUES (9, '2023-04-01', 1, 4, 120, 125)")
    cursor.execute("INSERT INTO Matches VALUES (10, '2023-04-08', 2, 1, 115, 120)")
    cursor.execute("INSERT INTO Matches VALUES (11, '2023-04-16', 3, 2, 110, 115)")
    cursor.execute("INSERT INTO Matches VALUES (12, '2023-04-27', 4, 3, 105, 100)")

    # Commit the changes and close the connection
    connection.commit()
    connection.close()

    print("Database created!")

Creating database...
Database created!


In [2]:
# Let's create our own function to make this process easier

def execute_query(query, database="nba.db"):
    connection = sqlite3.connect(database)
    df = pd.read_sql(query, connection)
    connection.close()

    return df

In [3]:
query = """ 
SELECT *
FROM Teams
"""

execute_query(query)

Unnamed: 0,Team_ID,Team_Name,City
0,1,Lakers,Los Angeles
1,2,Warriors,Golden State
2,3,Nets,Brooklyn
3,4,Bucks,Milwaukee


In [4]:
query = """
SELECT *
FROM Players
"""

execute_query(query)

Unnamed: 0,Player_ID,Player_Name,Position,Season_Points,Team_ID
0,1,LeBron James,SF,186,1
1,2,Anthony Davis,PF,157,1
2,3,Russell Westbrook,PG,178,1
3,4,Carmelo Anthony,F,231,1
4,5,Dwight Howard,C,45,1
5,6,Rajon Rondo,PG,24,1
6,7,Jarred Vanderbilt,F,120,1
7,8,Stephen Curry,PG,49,2
8,9,Klay Thompson,SG,15,2
9,10,Draymond Green,PF,98,2


In [5]:
query = """
SELECT *
FROM Matches
"""

execute_query(query)

Unnamed: 0,Match_ID,Match_Date,Home_Team_ID,Away_Team_ID,Home_Score,Away_Score
0,1,2023-01-05,1,2,105,100
1,2,2023-01-15,3,4,110,115
2,3,2023-01-27,2,3,120,125
3,4,2023-02-10,4,1,130,125
4,5,2023-02-21,1,3,110,120
5,6,2023-02-22,2,4,115,120
6,7,2023-03-05,3,1,105,100
7,8,2023-03-20,4,2,125,130
8,9,2023-04-01,1,4,120,125
9,10,2023-04-08,2,1,115,120


In [9]:
# Let's refresh the SQL Operations

# Try to develop a query to get the average points for the home team and the average for the visitors across all matches (2 different values/columns)

query = """
SELECT AVG(Home_Score) AS Home_Score_Avg, AVG(Away_Score) AS Away_Score_Avg
FROM Matches
"""

execute_query(query)

Unnamed: 0,Home_Score_Avg,Away_Score_Avg
0,114.166667,116.25


In [26]:
# Now let's refresh the JOIN operations

# Try to develop a query to get all home matches from the Team 'Lakers'

query = """
SELECT *
FROM Matches
LEFT JOIN Teams
ON Teams.Team_ID = Matches.Home_Team_ID
WHERE Team_ID = 1
"""

execute_query(query)

Unnamed: 0,Match_ID,Match_Date,Home_Team_ID,Away_Team_ID,Home_Score,Away_Score,Team_ID,Team_Name,City
0,1,2023-01-05,1,2,105,100,1,Lakers,Los Angeles
1,5,2023-02-21,1,3,110,120,1,Lakers,Los Angeles
2,9,2023-04-01,1,4,120,125,1,Lakers,Los Angeles


In [27]:
# Now let's refresh the JOIN operations - SECOND WAY OF DOING IT

# Try to develop a query to get all home matches from the Team 'Lakers'

query = """
SELECT Teams.Team_Name, Matches.Match_Date
FROM Teams
INNER JOIN Matches
ON Matches.Home_Team_ID = Teams.Team_ID
WHERE Teams.Team_Name = 'Lakers'
"""

execute_query(query)

Unnamed: 0,Team_Name,Match_Date
0,Lakers,2023-01-05
1,Lakers,2023-02-21
2,Lakers,2023-04-01


---
## 2. Intermediate SQL Commands (part 2):
  - `GROUP BY`: grouping data by a column
    - SELECT <column_name>, <aggregate_function>(<column_name>) FROM <table_name> GROUP BY <column_name>;
   
  - `HAVING`: filtering data after grouping - Having is basically the "Where" after the Group By
    - SELECT <column_name>, <aggregate_function>(<column_name>) FROM <table_name> GROUP BY <column_name> HAVING <condition>;
   
  - `ORDER BY`: sorting data
    - SELECT <column_name> FROM <table_name> ORDER BY <column_name> ASC|DESC;
   
  - `LIMIT`: limiting the number of rows returned
    - SELECT <column_name>, <aggregate_function>(<column_name>) FROM <table_name> LIMIT <number>;
  
  - `CREATE TABLE`: creating a new table
    - CREATE TABLE <table_name> (<column_name> <data_type>, <column_name> <data_type>, ...);
   
  - `INSERT INTO`: inserting rows into a table
    - INSERT INTO <table_name> (<column_name>, <column_name>, ...) VALUES (<value>, <value>, ...);
   
  - `UPDATE`: updating rows in a table
    - UPDATE <table_name> SET <column_name> = <value> WHERE <condition>;
   
  - `DELETE`: deleting rows from a table
    - DELETE FROM <table_name> WHERE <condition>;

  - `DROP TABLE`: deleting a table
    - DROP TABLE <table_name>;

In [44]:
# GROUP BY

# Get the total number of players in each team

query = """
SELECT Teams.Team_Name, COUNT(Players.Player_ID) as Player_Count
FROM Players
JOIN Teams ON Players.Team_ID = Teams.Team_ID
GROUP BY Teams.Team_Name
"""

execute_query(query)

Unnamed: 0,Team_Name,Player_Count
0,Bucks,7
1,Lakers,7
2,Nets,6
3,Warriors,5


In [29]:
# Let's get the total number of players in each position

query = """
SELECT Players.Position, COUNT(Players.Player_ID) as Player_Count
FROM Players
GROUP BY Players.Position
"""

execute_query(query)

Unnamed: 0,Position,Player_Count
0,C,4
1,F,3
2,PF,5
3,PG,5
4,SF,4
5,SG,4


In [31]:
# Let's get the total number of PGs (Point Guards) in each team

query = """
SELECT Teams.Team_Name, COUNT(Players.Player_ID) as PG_Count
FROM Players
JOIN Teams 
ON Players.Team_ID = Teams.Team_ID
WHERE Players.Position = 'PG'
GROUP BY Teams.Team_Name
"""

execute_query(query)

Unnamed: 0,Team_Name,PG_Count
0,Bucks,1
1,Lakers,2
2,Nets,1
3,Warriors,1


In [36]:
query = """
SELECT *
FROM Teams
"""

execute_query(query)

Unnamed: 0,Team_ID,Team_Name,City
0,1,Lakers,Los Angeles
1,2,Warriors,Golden State
2,3,Nets,Brooklyn
3,4,Bucks,Milwaukee


In [37]:
query = """
SELECT *
FROM Matches
"""

execute_query(query)

Unnamed: 0,Match_ID,Match_Date,Home_Team_ID,Away_Team_ID,Home_Score,Away_Score
0,1,2023-01-05,1,2,105,100
1,2,2023-01-15,3,4,110,115
2,3,2023-01-27,2,3,120,125
3,4,2023-02-10,4,1,130,125
4,5,2023-02-21,1,3,110,120
5,6,2023-02-22,2,4,115,120
6,7,2023-03-05,3,1,105,100
7,8,2023-03-20,4,2,125,130
8,9,2023-04-01,1,4,120,125
9,10,2023-04-08,2,1,115,120


In [46]:
# Your turn: Get the total number of visitor matches played by each team

query = """
SELECT Teams.*, COUNT(Away_Team_ID)
FROM Teams
JOIN Matches
ON Teams.Team_ID = Matches.Away_Team_ID
GROUP BY Teams.Team_Name
"""

execute_query(query)

Unnamed: 0,Team_ID,Team_Name,City,COUNT(Away_Team_ID)
0,4,Bucks,Milwaukee,3
1,1,Lakers,Los Angeles,3
2,3,Nets,Brooklyn,3
3,2,Warriors,Golden State,3


In [49]:
# Your turn: Get the total number of visitor matches played by each team
# Here you cant knowthe team name, only the ID appears


query = """
SELECT Matches.Away_Team_ID, COUNT(Matches.Away_Team_ID)
FROM Matches
GROUP BY Matches.Away_Team_ID
"""

execute_query(query)

Unnamed: 0,Away_Team_ID,COUNT(Matches.Away_Team_ID)
0,1,3
1,2,3
2,3,3
3,4,3


In [52]:
# Your turn: Get the total number of visitor matches played by each team
# Here you cant knowthe team name, only the ID appears


query = """
SELECT Matches.Away_Team_ID, Teams.Team_Name, COUNT(Matches.Away_Team_ID)
FROM Matches
INNER JOIN Teams
ON Matches.Away_Team_ID = Teams.Team_ID
GROUP BY Matches.Away_Team_ID
"""

execute_query(query)

Unnamed: 0,Away_Team_ID,Team_Name,COUNT(Matches.Away_Team_ID)
0,1,Lakers,3
1,2,Warriors,3
2,3,Nets,3
3,4,Bucks,3


In [58]:
# WITHOUT HAVING TO UNDERSTAND THE HAVING COMMAND

# Get the teams with more than 2 players

query = """
SELECT Teams.Team_Name, COUNT(Players.Player_ID) as Player_Count
FROM Players
JOIN Teams 
ON Players.Team_ID = Teams.Team_ID
GROUP BY Teams.Team_Name
"""

execute_query(query)

Unnamed: 0,Team_Name,Player_Count
0,Bucks,7
1,Lakers,7
2,Nets,6
3,Warriors,5


In [63]:
# HAVING

# Get the teams with more than 2 players

query = """
SELECT Teams.Team_Name, COUNT(Players.Player_ID) as Player_Count
FROM Players
JOIN Teams 
ON Players.Team_ID = Teams.Team_ID
GROUP BY Teams.Team_Name
HAVING Player_Count > 6
"""

execute_query(query)

Unnamed: 0,Team_Name,Player_Count
0,Bucks,7
1,Lakers,7


In [65]:
# WITHOUT THE HAVING
# Let's get the positions where players score on average more than 100 points per season

query = """
SELECT Players.Position, AVG(Players.Season_Points) as Avg_Season_Points
FROM Players
GROUP BY Players.Position
"""

execute_query(query)

Unnamed: 0,Position,Avg_Season_Points
0,C,71.0
1,F,174.0
2,PF,180.0
3,PG,65.2
4,SF,192.25
5,SG,33.5


In [66]:
# Let's get the positions where players score on average more than 100 points per season

query = """
SELECT Players.Position, AVG(Players.Season_Points) as Avg_Season_Points
FROM Players
GROUP BY Players.Position
HAVING Avg_Season_Points > 100
"""

execute_query(query)

Unnamed: 0,Position,Avg_Season_Points
0,F,174.0
1,PF,180.0
2,SF,192.25


In [71]:
query = """ 
SELECT *
FROM TEAMS
"""

execute_query(query)

Unnamed: 0,Team_ID,Team_Name,City
0,1,Lakers,Los Angeles
1,2,Warriors,Golden State
2,3,Nets,Brooklyn
3,4,Bucks,Milwaukee


In [72]:
query = """ 
SELECT *
FROM PLAYERS
"""

execute_query(query)

Unnamed: 0,Player_ID,Player_Name,Position,Season_Points,Team_ID
0,1,LeBron James,SF,186,1
1,2,Anthony Davis,PF,157,1
2,3,Russell Westbrook,PG,178,1
3,4,Carmelo Anthony,F,231,1
4,5,Dwight Howard,C,45,1
5,6,Rajon Rondo,PG,24,1
6,7,Jarred Vanderbilt,F,120,1
7,8,Stephen Curry,PG,49,2
8,9,Klay Thompson,SG,15,2
9,10,Draymond Green,PF,98,2


In [95]:
# Your turn: Get the teams with more than 800 points in total (extracted from their players)

# hint: join players with teams, sum the players season points and call it Total_Points, then group by teams and filter with HAVING Total_Points. 
# Remember to SELECT all necessary columns as well!

query = """ 
SELECT Teams.Team_Name, SUM(Season_Points) AS Total_Points
FROM PLAYERS
JOIN TEAMS
ON PLAYERS.Team_ID = TEAMS.Team_ID
GROUP BY Team_Name
HAVING Total_Points > 800
"""

execute_query(query)

Unnamed: 0,Team_Name,Total_Points
0,Bucks,863
1,Lakers,941


### Enric:

In [93]:
# Your turn: Get the teams with more than 800 points in total (extracted from their players)

# hint: join players with teams, sum the players season points and call it Total_Points, then group by teams and filter with HAVING Total_Points. 
# Remember to SELECT all necessary columns as well!

query = """ 
SELECT Teams.Team_Name, SUM(Players.Season_Points) AS Teams_Points
FROM Teams
INNER JOIN Players
ON Teams.Team_ID = Players.Team_ID
GROUP BY Teams.Team_Name
HAVING Teams_Points > 800
"""

execute_query(query)

Unnamed: 0,Team_Name,Teams_Points
0,Bucks,863
1,Lakers,941


In [98]:
# ORDER BY

# Get the players ordered by their names

query = """
SELECT Player_ID, Player_Name 
FROM Players 
ORDER BY Player_Name
"""

execute_query(query)

Unnamed: 0,Player_ID,Player_Name
0,11,Andrew Wiggins
1,2,Anthony Davis
2,16,Blake Griffin
3,22,Brook Lopez
4,4,Carmelo Anthony
5,23,Donte DiVincenzo
6,10,Draymond Green
7,5,Dwight Howard
8,18,Giannis Antetokounmpo
9,19,Giannis Antetokounmpo


In [105]:
# Order Players by their Season Points
# we will need to use the DESC keyword to order them from highest to lowest

query = """
SELECT Player_Name, Season_Points
FROM Players
ORDER BY Season_Points DESC
"""

execute_query(query)

Unnamed: 0,Player_Name,Season_Points
0,Giannis Antetokounmpo,245
1,Carmelo Anthony,231
2,Khris Middleton,214
3,Blake Griffin,206
4,Giannis Antetokounmpo,194
5,Andrew Wiggins,189
6,LeBron James,186
7,Kevin Durant,180
8,Russell Westbrook,178
9,MarJon Beauchamp,171


In [106]:
# Let's add their team to the previous query

query = """
SELECT Player_Name, Season_Points, Team_Name
FROM Players
JOIN Teams ON Players.Team_ID = Teams.Team_ID
ORDER BY Season_Points DESC
"""

execute_query(query)

Unnamed: 0,Player_Name,Season_Points,Team_Name
0,Giannis Antetokounmpo,245,Bucks
1,Carmelo Anthony,231,Lakers
2,Khris Middleton,214,Bucks
3,Blake Griffin,206,Nets
4,Giannis Antetokounmpo,194,Nets
5,Andrew Wiggins,189,Warriors
6,LeBron James,186,Lakers
7,Kevin Durant,180,Nets
8,Russell Westbrook,178,Lakers
9,MarJon Beauchamp,171,Bucks


In [107]:
# Let's group by teams and sum the points of each player in the team, then order them descending by their total points

query = """
SELECT Team_Name, SUM(Season_Points) as Total_Points
FROM Players
JOIN Teams ON Players.Team_ID = Teams.Team_ID
GROUP BY Team_Name
ORDER BY Total_Points DESC
"""

execute_query(query)

Unnamed: 0,Team_Name,Total_Points
0,Lakers,941
1,Bucks,863
2,Nets,707
3,Warriors,424


In [109]:
# LIMIT

# Get the top 3 players with the most points

query = """
SELECT Player_Name, Season_Points
FROM Players
ORDER BY Season_Points DESC
LIMIT 5
"""

execute_query(query)

Unnamed: 0,Player_Name,Season_Points
0,Giannis Antetokounmpo,245
1,Carmelo Anthony,231
2,Khris Middleton,214
3,Blake Griffin,206
4,Giannis Antetokounmpo,194


### The following is an example of the query we have, but when we close this is erased, if you usually use the same table you can create a new table

In [112]:
query = """
    SELECT Team_Name, SUM(Season_Points) as Total_Points
    FROM Players
    JOIN Teams ON Players.Team_ID = Teams.Team_ID
    GROUP BY Team_Name
    ORDER BY Total_Points DESC
"""

execute_query(query)

Unnamed: 0,Team_Name,Total_Points
0,Lakers,941
1,Bucks,863
2,Nets,707
3,Warriors,424


In [114]:
# CREATE TABLE

# Let's create a table with the total points of each team

# The IF NOT EXISTS, it will create it unless it exists, so it wotn throw an error if you run this query multiple times

# Connect to SQLite database
connection = sqlite3.connect("nba.db")

# Create cursor object
cursor = connection.cursor()

query = """
CREATE TABLE IF NOT EXISTS Teams_Points AS
    SELECT Team_Name, SUM(Season_Points) as Total_Points
    FROM Players
    JOIN Teams ON Players.Team_ID = Teams.Team_ID
    GROUP BY Team_Name
    ORDER BY Total_Points DESC
"""

cursor.execute(query)

connection.commit()
connection.close()

In [113]:
# Let's check if the table was created

query = """
SELECT *
FROM Teams_Points
"""

execute_query(query)

Unnamed: 0,Team_Name,Total_Points
0,Lakers,941
1,Bucks,863
2,Nets,707
3,Warriors,424


In [115]:
# INSERT INTO

# Let's add a new team to the Teams_Points table

# Connect to SQLite database
connection = sqlite3.connect("nba.db")

# Create cursor object
cursor = connection.cursor()

query = """
INSERT INTO Teams_Points VALUES ('Clippers', 870)
"""

cursor.execute(query)

connection.commit()
connection.close()

In [116]:
# Let's check the table again

query = """
SELECT *
FROM Teams_Points
"""

execute_query(query)

Unnamed: 0,Team_Name,Total_Points
0,Lakers,941
1,Bucks,863
2,Nets,707
3,Warriors,424
4,Clippers,870


In [117]:
# UPDATE

# Let's update the points of the Clippers

connection = sqlite3.connect("nba.db")
cursor = connection.cursor()

query = """
UPDATE Teams_Points
SET Total_Points = 1100
WHERE Team_Name = 'Clippers'
"""

cursor.execute(query)

connection.commit()
connection.close()

In [118]:
# Let's check the table again

query = """
SELECT *
FROM Teams_Points
"""

execute_query(query)

Unnamed: 0,Team_Name,Total_Points
0,Lakers,941
1,Bucks,863
2,Nets,707
3,Warriors,424
4,Clippers,1100


In [119]:
# DELETE

# DELETE FROM <table_name> WHERE <condition>;

# Let's delete the Clippers from the table

connection = sqlite3.connect("nba.db")
cursor = connection.cursor()

query = """
DELETE 
FROM Teams_Points
WHERE Team_Name = 'Clippers'
"""

cursor.execute(query)

connection.commit()
connection.close()

In [120]:
# Let's check the table again

query = """
SELECT *
FROM Teams_Points
"""

execute_query(query)

Unnamed: 0,Team_Name,Total_Points
0,Lakers,941
1,Bucks,863
2,Nets,707
3,Warriors,424


In [121]:
# DROP TABLE

# Let's drop the Teams_Points table

connection = sqlite3.connect("nba.db")
cursor = connection.cursor()

query = """
DROP TABLE Teams_Points
"""

cursor.execute(query)

connection.commit()
connection.close()

In [122]:
# Let's check that the table no longer exists (so querying it we will get an error)

query = """
SELECT *
FROM Teams_Points
"""

execute_query(query)

DatabaseError: Execution failed on sql '
SELECT *
FROM Teams_Points
': no such table: Teams_Points

---
## 3. Python refresh and practice

From the spotify-2023.csv file that we used in day 7, we will try to get the unique number of individual artists. To do it, first we will need to process it somehow as some arist(s)_name values contain multiple artist's name separated by a commas.

In [171]:
spotify_df = pd.read_csv("spotify-2023.csv")

spotify_df

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,Major,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,Major,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,Major,51,32,53,17,0,31,6
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,Major,55,58,72,11,0,11,15
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Minor,65,23,80,14,63,11,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
942,My Mind & Me,Selena Gomez,1,2022,11,3,953,0,91473363,61,...,144,A,Major,60,24,39,57,0,8,3
943,Bigger Than The Whole Sky,Taylor Swift,1,2022,10,21,1180,0,121871870,4,...,166,F#,Major,42,7,24,83,1,12,6
944,A Veces (feat. Feid),"Feid, Paulo Londra",2,2022,11,3,573,0,73513683,2,...,92,C#,Major,80,81,67,4,0,8,6
945,En La De Ella,"Feid, Sech, Jhayco",3,2022,10,20,1320,0,133895612,29,...,97,C#,Major,82,67,77,8,0,12,5


In [172]:
spotify_df["artist(s)_name"].unique()

array(['Latto, Jung Kook', 'Myke Towers', 'Olivia Rodrigo',
       'Taylor Swift', 'Bad Bunny', 'Dave, Central Cee',
       'Eslabon Armado, Peso Pluma', 'Quevedo', 'Gunna',
       'Peso Pluma, Yng Lvcas', 'Bad Bunny, Grupo Frontera', 'NewJeans',
       'Miley Cyrus', 'David Kushner', 'Harry Styles', 'SZA',
       'Fifty Fifty', 'Billie Eilish', 'Feid, Young Miko', 'Jimin',
       'Gabito Ballesteros, Junior H, Peso Pluma', 'Arctic Monkeys',
       'Bizarrap, Peso Pluma', 'The Weeknd, Madonna, Playboi Carti',
       'Fuerza Regida', 'R��ma, Selena G', 'Tainy, Bad Bunny',
       'Morgan Wallen', 'Dua Lipa', 'Troye Sivan',
       'Peso Pluma, Grupo Frontera',
       'The Weeknd, 21 Savage, Metro Boomin', 'Karol G, Shakira',
       'Big One, Duki, Lit Killah, Maria Becerra, FMK, Rusherking, Emilia, Tiago pzk',
       'Yahritza Y Su Esencia, Grupo Frontera', 'Junior H, Peso Pluma',
       'Post Malone, Swae Lee', 'Bebe Rexha, David Guetta',
       'Tyler, The Creator, Kali Uchis', 'Nicki M

In [173]:
all_artists_raw = list(spotify_df["artist(s)_name"])

all_string = ", ".join(all_artists_raw)

sep_artists = all_string.split(", ")

len(sep_artists)

len(sep_artists), len(set(sep_artists))

(1474, 692)

---
## 4. Python (mini)-Startup Lab 🧪

Last activity of the subject! 🎉

Now it's your turn to practice what we learnt and practiced about Python during the course. You have to find a simple and well defined problem from your daily life, work or hobbies and solve it using Python. Then, you have to present your solution to the class in 1 minut as in an elevator pitch.

To do it you can use the "Start with WHY" method: 

- Start with WHY: explain the problem you want to solve and why it is important to solve it.

- HOW: explain how you solved it, what tools and techniques you used.

- WHAT: finally show the solution and the results.


Let's see an example:

#### Drone's flight time tracker

Some years ago I had a drone's startup were we had differents drones that we operated every now and then. It was mandatory to keep a track of their flights and flight time, something that we did manually.

I decided to solve this using Pyhton to build a dictionary of lists, were every element is a drone with its info, and then keep track of everyone of them with a list of flights and flight time.

So, using my simple app, drone pilots can add new flights and the app will update the flight time of the drone in a centralized and standard format. Also, I added a function to get the total flight time of a drone.

In [125]:
# Data

drones_times = {
    "s_drone": [23, 5, 12],
    "l_drone": [15, 10],
    "wing_drone": [40, 52, 27],
}


# app

option = 0

while option != 3:
    
    print("\n\nSelect and option:")
    print("------------------")
    print("1. Add drone flight time")
    print("2. Show drone flight time")
    print("3. Exit")

    option = int(input("Select an option: "))

    if option == 1:
        drone_id = input("Enter the drone id: ")
        flight_time = input("Enter the new flight time")

        drones_times[drone_id].append(flight_time)

    elif option == 2:
        for drone in drones_times:
            print(drones_times, drones_times[drone], "total:", sum(drones_times[drone]))

    elif option == 3:
        print("Goodbye!")

    else:
        print("Invalid option")



Select and option:
------------------
1. Add drone flight time
2. Show drone flight time
3. Exit


ValueError: invalid literal for int() with base 10: 'wing_drone'

In [183]:
class Supermarket:
    def __init__(self, name, products):
        self.name = name
        self.products = products

def compare_prices(supermarkets, product):
    prices = {market.name: market.products.get(product, float('inf')) for market in supermarkets}
    cheapest_supermarket = min(prices, key=prices.get)
    
    print(f"Product: {product}")
    for market, price in prices.items():
        print(f"{market}: {price} euros")
    
    print(f"The cheapest supermarket for {product} is: {cheapest_supermarket}")

if __name__ == "__main__":
    # Define supermarkets and their products with prices
    mercadona = Supermarket("Mercadona", {"eggs": 1.5, "milk": 0.8, "bread": 1.2})
    lidl = Supermarket("Lidl", {"eggs": 1.2, "milk": 0.8, "bread": 1.4})
    keisy = Supermarket("Keisy", {"eggs": 1.6, "milk": 0.9, "bread": 1.4})
    carrefour = Supermarket("Carrefour", {"eggs": 1.4, "milk": 0.85, "bread": 1.1})

    supermarkets = [mercadona, lidl, keisy, carrefour]

    # Choose a product to compare prices
    product_to_compare = input()

    # Compare prices and print the results
    compare_prices(supermarkets, product_to_compare)


Product: milk
Mercadona: 0.8 euros
Lidl: 0.8 euros
Keisy: 0.9 euros
Carrefour: 0.85 euros
The cheapest supermarket for milk is: Mercadona
