Importamos la librería de sqlite3

In [63]:
import sqlite3
import pandas as pd

Definimos funciones para facilitar manejo de base de datos.

In [69]:
def esquema_tablas(cursor):
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tablas = cursor.fetchall()

    # Obtener el esquema de cada tabla
    for (tabla,) in tablas:
        print(f"\nTabla: {tabla}")
        cursor.execute(f"PRAGMA table_info({tabla});")
        columnas = cursor.fetchall()
        for col in columnas:
            cid, name, tipo, notnull, dflt_value, pk = col
            print(f"  - {name} ({tipo}) {'PRIMARY KEY' if pk else ''}")
    
    return

def execute_query(query, cursor):
    cursor.execute(query)
    resultados = cursor.fetchall()
    for resultado in resultados:
        print(resultado)
    

    return 

def vista_rapida_query(query, conn, limite=5):
    """
    Ejecuta una consulta SQL sobre una conexión SQLite3 y muestra una vista rápida con pandas.

    Parámetros:
    - query (str): Consulta SQL a ejecutar.
    - conn (sqlite3.Connection): Objeto de conexión a la base de datos SQLite.
    - limite (int): Número de filas a mostrar. Default es 5.

    Retorna:
    - DataFrame con los resultados.
    """
    try:
        df = pd.read_sql_query(query, conn)
        display(df.head(limite))
        return 
    except Exception as e:
        print("Error al ejecutar la consulta:")
        print(e)
        return None

Ejemplo de uso:

In [5]:
conn = sqlite3.connect("Chinook_Sqlite.sqlite")
cursor = conn.cursor()

In [6]:
esquema_tablas(cursor)


Tabla: Album
  - AlbumId (INTEGER) PRIMARY KEY
  - Title (NVARCHAR(160)) 
  - ArtistId (INTEGER) 

Tabla: Artist
  - ArtistId (INTEGER) PRIMARY KEY
  - Name (NVARCHAR(120)) 

Tabla: Customer
  - CustomerId (INTEGER) PRIMARY KEY
  - FirstName (NVARCHAR(40)) 
  - LastName (NVARCHAR(20)) 
  - Company (NVARCHAR(80)) 
  - Address (NVARCHAR(70)) 
  - City (NVARCHAR(40)) 
  - State (NVARCHAR(40)) 
  - Country (NVARCHAR(40)) 
  - PostalCode (NVARCHAR(10)) 
  - Phone (NVARCHAR(24)) 
  - Fax (NVARCHAR(24)) 
  - Email (NVARCHAR(60)) 
  - SupportRepId (INTEGER) 

Tabla: Employee
  - EmployeeId (INTEGER) PRIMARY KEY
  - LastName (NVARCHAR(20)) 
  - FirstName (NVARCHAR(20)) 
  - Title (NVARCHAR(30)) 
  - ReportsTo (INTEGER) 
  - BirthDate (DATETIME) 
  - HireDate (DATETIME) 
  - Address (NVARCHAR(70)) 
  - City (NVARCHAR(40)) 
  - State (NVARCHAR(40)) 
  - Country (NVARCHAR(40)) 
  - PostalCode (NVARCHAR(10)) 
  - Phone (NVARCHAR(24)) 
  - Fax (NVARCHAR(24)) 
  - Email (NVARCHAR(60)) 

Tabla: Genre

In [18]:
query = """ 
SELECT *
FROM Customer
WHERE Country == 'USA';
"""

In [19]:
execute_query(query, cursor)

(16, 'Frank', 'Harris', 'Google Inc.', '1600 Amphitheatre Parkway', 'Mountain View', 'CA', 'USA', '94043-1351', '+1 (650) 253-0000', '+1 (650) 253-0000', 'fharris@google.com', 4)
(17, 'Jack', 'Smith', 'Microsoft Corporation', '1 Microsoft Way', 'Redmond', 'WA', 'USA', '98052-8300', '+1 (425) 882-8080', '+1 (425) 882-8081', 'jacksmith@microsoft.com', 5)
(18, 'Michelle', 'Brooks', None, '627 Broadway', 'New York', 'NY', 'USA', '10012-2612', '+1 (212) 221-3546', '+1 (212) 221-4679', 'michelleb@aol.com', 3)
(19, 'Tim', 'Goyer', 'Apple Inc.', '1 Infinite Loop', 'Cupertino', 'CA', 'USA', '95014', '+1 (408) 996-1010', '+1 (408) 996-1011', 'tgoyer@apple.com', 3)
(20, 'Dan', 'Miller', None, '541 Del Medio Avenue', 'Mountain View', 'CA', 'USA', '94040-111', '+1 (650) 644-3358', None, 'dmiller@comcast.com', 4)
(21, 'Kathy', 'Chase', None, '801 W 4th Street', 'Reno', 'NV', 'USA', '89503', '+1 (775) 223-7665', None, 'kachase@hotmail.com', 5)
(22, 'Heather', 'Leacock', None, '120 S Orange Ave', 'Orl

In [22]:
otra_query = """ 
SELECT *
FROM Artist
LIMIT 10;
"""
execute_query(otra_query, cursor)

(1, 'AC/DC')
(2, 'Accept')
(3, 'Aerosmith')
(4, 'Alanis Morissette')
(5, 'Alice In Chains')
(6, 'Antônio Carlos Jobim')
(7, 'Apocalyptica')
(8, 'Audioslave')
(9, 'BackBeat')
(10, 'Billy Cobham')


Después de estos ejemplos, procedemos a cerrar la conexión.

In [23]:
conn.commit()
conn.close()

Para ejemplificar algunos comandos de SQL, se realizarán operaciones en una sola tabla. ¿Podemos pasar algún csv trabajado y pasarlo a sqlite? La respuesta es si.

In [24]:
import pandas as pd

In [25]:
df = pd.read_csv('datos_practica_1.csv')

In [26]:
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [27]:
df = df.drop('Unnamed: 32', axis = 1)

In [28]:
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


Procedemos a llevar esta tabla a una base de datos relacional.

In [35]:
# Conectar a la base de datos
conn = sqlite3.connect("datos_practica_1.sqlite")

In [36]:
df.to_sql("tabla_1", conn, if_exists="replace", index=False)

569

In [37]:
conn.commit()
conn.close()

Veamos si se guardaron los registros:

In [38]:
conn = sqlite3.connect("datos_practica_1.sqlite")
cursor = conn.cursor()

In [39]:
query = """ 
SELECT radius_mean, texture_mean
FROM tabla_1;
"""

In [40]:
execute_query(query, cursor)

(17.99, 10.38)
(20.57, 17.77)
(19.69, 21.25)
(11.42, 20.38)
(20.29, 14.34)
(12.45, 15.7)
(18.25, 19.98)
(13.71, 20.83)
(13.0, 21.82)
(12.46, 24.04)
(16.02, 23.24)
(15.78, 17.89)
(19.17, 24.8)
(15.85, 23.95)
(13.73, 22.61)
(14.54, 27.54)
(14.68, 20.13)
(16.13, 20.68)
(19.81, 22.15)
(13.54, 14.36)
(13.08, 15.71)
(9.504, 12.44)
(15.34, 14.26)
(21.16, 23.04)
(16.65, 21.38)
(17.14, 16.4)
(14.58, 21.53)
(18.61, 20.25)
(15.3, 25.27)
(17.57, 15.05)
(18.63, 25.11)
(11.84, 18.7)
(17.02, 23.98)
(19.27, 26.47)
(16.13, 17.88)
(16.74, 21.59)
(14.25, 21.72)
(13.03, 18.42)
(14.99, 25.2)
(13.48, 20.82)
(13.44, 21.58)
(10.95, 21.35)
(19.07, 24.81)
(13.28, 20.28)
(13.17, 21.81)
(18.65, 17.6)
(8.196, 16.84)
(13.17, 18.66)
(12.05, 14.63)
(13.49, 22.3)
(11.76, 21.6)
(13.64, 16.34)
(11.94, 18.24)
(18.22, 18.7)
(15.1, 22.02)
(11.52, 18.75)
(19.21, 18.57)
(14.71, 21.59)
(13.05, 19.31)
(8.618, 11.79)
(10.17, 14.88)
(8.598, 20.98)
(14.25, 22.15)
(9.173, 13.86)
(12.68, 23.84)
(14.78, 23.94)
(9.465, 21.01)
(11.31,

¿Cómo interpreto la tabla sqlite? ¿Hizo buen trabajo al momento de inferir el tipo de dato? Veamos.

In [42]:
esquema_tablas(cursor)


Tabla: tabla_1
  - id (INTEGER) 
  - diagnosis (TEXT) 
  - radius_mean (REAL) 
  - texture_mean (REAL) 
  - perimeter_mean (REAL) 
  - area_mean (REAL) 
  - smoothness_mean (REAL) 
  - compactness_mean (REAL) 
  - concavity_mean (REAL) 
  - concave points_mean (REAL) 
  - symmetry_mean (REAL) 
  - fractal_dimension_mean (REAL) 
  - radius_se (REAL) 
  - texture_se (REAL) 
  - perimeter_se (REAL) 
  - area_se (REAL) 
  - smoothness_se (REAL) 
  - compactness_se (REAL) 
  - concavity_se (REAL) 
  - concave points_se (REAL) 
  - symmetry_se (REAL) 
  - fractal_dimension_se (REAL) 
  - radius_worst (REAL) 
  - texture_worst (REAL) 
  - perimeter_worst (REAL) 
  - area_worst (REAL) 
  - smoothness_worst (REAL) 
  - compactness_worst (REAL) 
  - concavity_worst (REAL) 
  - concave points_worst (REAL) 
  - symmetry_worst (REAL) 
  - fractal_dimension_worst (REAL) 


In [45]:
query = """ 
SELECT id, diagnosis, radius_mean
FROM tabla_1;
"""
execute_query(query, cursor)

(842302, 'M', 17.99)
(842517, 'M', 20.57)
(84300903, 'M', 19.69)
(84348301, 'M', 11.42)
(84358402, 'M', 20.29)
(843786, 'M', 12.45)
(844359, 'M', 18.25)
(84458202, 'M', 13.71)
(844981, 'M', 13.0)
(84501001, 'M', 12.46)
(845636, 'M', 16.02)
(84610002, 'M', 15.78)
(846226, 'M', 19.17)
(846381, 'M', 15.85)
(84667401, 'M', 13.73)
(84799002, 'M', 14.54)
(848406, 'M', 14.68)
(84862001, 'M', 16.13)
(849014, 'M', 19.81)
(8510426, 'B', 13.54)
(8510653, 'B', 13.08)
(8510824, 'B', 9.504)
(8511133, 'M', 15.34)
(851509, 'M', 21.16)
(852552, 'M', 16.65)
(852631, 'M', 17.14)
(852763, 'M', 14.58)
(852781, 'M', 18.61)
(852973, 'M', 15.3)
(853201, 'M', 17.57)
(853401, 'M', 18.63)
(853612, 'M', 11.84)
(85382601, 'M', 17.02)
(854002, 'M', 19.27)
(854039, 'M', 16.13)
(854253, 'M', 16.74)
(854268, 'M', 14.25)
(854941, 'B', 13.03)
(855133, 'M', 14.99)
(855138, 'M', 13.48)
(855167, 'M', 13.44)
(855563, 'M', 10.95)
(855625, 'M', 19.07)
(856106, 'M', 13.28)
(85638502, 'M', 13.17)
(857010, 'M', 18.65)
(85713702,

In [47]:
query = """ 
SELECT id, diagnosis, radius_mean
FROM tabla_1
WHERE diagnosis = 'M';
"""
execute_query(query, cursor)

(842302, 'M', 17.99)
(842517, 'M', 20.57)
(84300903, 'M', 19.69)
(84348301, 'M', 11.42)
(84358402, 'M', 20.29)
(843786, 'M', 12.45)
(844359, 'M', 18.25)
(84458202, 'M', 13.71)
(844981, 'M', 13.0)
(84501001, 'M', 12.46)
(845636, 'M', 16.02)
(84610002, 'M', 15.78)
(846226, 'M', 19.17)
(846381, 'M', 15.85)
(84667401, 'M', 13.73)
(84799002, 'M', 14.54)
(848406, 'M', 14.68)
(84862001, 'M', 16.13)
(849014, 'M', 19.81)
(8511133, 'M', 15.34)
(851509, 'M', 21.16)
(852552, 'M', 16.65)
(852631, 'M', 17.14)
(852763, 'M', 14.58)
(852781, 'M', 18.61)
(852973, 'M', 15.3)
(853201, 'M', 17.57)
(853401, 'M', 18.63)
(853612, 'M', 11.84)
(85382601, 'M', 17.02)
(854002, 'M', 19.27)
(854039, 'M', 16.13)
(854253, 'M', 16.74)
(854268, 'M', 14.25)
(855133, 'M', 14.99)
(855138, 'M', 13.48)
(855167, 'M', 13.44)
(855563, 'M', 10.95)
(855625, 'M', 19.07)
(856106, 'M', 13.28)
(85638502, 'M', 13.17)
(857010, 'M', 18.65)
(85715, 'M', 13.17)
(857392, 'M', 18.22)
(857438, 'M', 15.1)
(857637, 'M', 19.21)
(857793, 'M', 1

In [48]:
query = """ 
SELECT id, diagnosis, area_mean
FROM tabla_1
WHERE area_mean > 1000
LIMIT 5;
"""
execute_query(query, cursor)

(842302, 'M', 1001.0)
(842517, 'M', 1326.0)
(84300903, 'M', 1203.0)
(84358402, 'M', 1297.0)
(844359, 'M', 1040.0)


In [50]:
query = """ 

SELECT diagnosis, COUNT(*) AS total
FROM tabla_1
GROUP BY diagnosis;
"""
execute_query(query, cursor)

('B', 357)
('M', 212)


In [51]:
query = """ 
SELECT diagnosis, AVG(radius_mean) AS promedio_radio
FROM tabla_1
GROUP BY diagnosis;
"""
execute_query(query, cursor)

('B', 12.14652380952381)
('M', 17.462830188679245)


In [56]:
query = """ 
SELECT diagnosis, AVG(area_mean) AS promedio_area
FROM tabla_1
GROUP BY diagnosis
HAVING (promedio_area > 600);
"""
execute_query(query, cursor)

('M', 978.3764150943396)


In [57]:
query = """ 
SELECT 
    diagnosis,
    COUNT(*) AS total,
    AVG(perimeter_mean) AS avg_perimeter,
    MIN(perimeter_mean) AS min_perimeter,
    MAX(perimeter_mean) AS max_perimeter
FROM tabla_1
"""
execute_query(query, cursor)

('M', 569, 91.96903339191563, 43.79, 188.5)


In [58]:
query = """ 
SELECT id, diagnosis, radius_mean, texture_mean
FROM tabla_1
WHERE radius_mean > 20
ORDER BY texture_mean DESC
LIMIT 10;
"""
execute_query(query, cursor)

(88995002, 'M', 20.73, 31.12)
(927241, 'M', 20.6, 29.33)
(926682, 'M', 20.13, 28.25)
(88206102, 'M', 20.51, 27.81)
(887549, 'M', 20.31, 27.06)
(88299702, 'M', 23.21, 26.97)
(9011494, 'M', 20.2, 26.83)
(878796, 'M', 23.29, 26.67)
(873593, 'M', 21.09, 26.57)
(911296202, 'M', 27.42, 26.27)


In [80]:
conn.commit()
conn.close()