In [74]:
import pandas as pd
import numpy as np
import sqlite3 as db

In [75]:
%pwd

'/Users/manulabricole/Documents/CDN/BDD'

# Import And Query

### Name of the file .db

In [76]:
db_name = 'billboard-200.db'

### Get infos of DB

In [77]:
def get_infos(db_file):
    conn = db.connect(db_file)
    cur = conn.cursor()
    
    # Retrieve the names of all tables in the database
    cur.execute("SELECT name FROM sqlite_master WHERE type='table'")
    tables = [table[0] for table in cur.fetchall()]
    
    # Retrieve the columns in each table
    columns = {}
    for table in tables:
        cur.execute("PRAGMA table_info({})".format(table))
        columns[table] = [column[1] for column in cur.fetchall()]
    
    cur.close()
    conn.close()
    print("°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°")
    print("--------------- TABLES -----------------")
    print("")
    print("Tables:", tables)
    print("")
    print("--------------- COLUMNS ----------------")
    for table in tables:
        print("")
        print(f"----> {table} <----")
        print("Columns --> ", columns[table])

### Query the DB

In [100]:
# The name has to be in the same folder as the notebook
def query_db(name, query):
    
    conn = db.connect(name)
    cur = conn.cursor()
    cur.execute(query)
    column_names = [description[0] for description in cur.description]
    results = cur.fetchall()
    cur.close()
    conn.close()
    
    df_results = pd.DataFrame(results, columns=column_names)

    return df_results

### Test

In [79]:
query_db(db_name, "SELECT * FROM albums WHERE artist='The Beatles'")

Unnamed: 0,id,date,artist,album,rank,length,track_length
0,118,2019-01-19,The Beatles,The Beatles [White Album],117,107.0,
1,136,2019-01-19,The Beatles,1,135,27.0,172303.481481
2,139,2019-01-19,The Beatles,Abbey Road,138,17.0,184335.058824
3,282,2019-01-12,The Beatles,The Beatles [White Album],81,107.0,
4,327,2019-01-12,The Beatles,Abbey Road,126,17.0,184335.058824
5,329,2019-01-12,The Beatles,1,128,27.0,172303.481481
6,469,2019-01-05,The Beatles,The Beatles [White Album],68,107.0,
7,518,2019-01-05,The Beatles,Abbey Road,117,17.0,184335.058824
8,564,2019-01-05,The Beatles,1,163,27.0,172303.481481
9,582,2019-01-05,The Beatles,Sgt. Pepper's Lonely Hearts Club Band,181,13.0,186493.923077


Unnamed: 0,id,date,artist,album,rank,length,track_length
0,118,2019-01-19,The Beatles,The Beatles [White Album],117,107.0,
1,136,2019-01-19,The Beatles,1,135,27.0,172303.481481
2,139,2019-01-19,The Beatles,Abbey Road,138,17.0,184335.058824
3,282,2019-01-12,The Beatles,The Beatles [White Album],81,107.0,
4,327,2019-01-12,The Beatles,Abbey Road,126,17.0,184335.058824
...,...,...,...,...,...,...,...
2781,564970,1964-02-15,The Beatles,Introducing...The Beatles,22,,
2782,565098,1964-02-15,The Beatles,The Beatles With Tony Sheridan And Their Guests,147,,
2783,565104,1964-02-08,The Beatles,Meet The Beatles!,3,12.0,209922.000000
2784,565160,1964-02-08,The Beatles,Introducing...The Beatles,59,,


# billeboard-200

In [92]:
name = 'billboard-200.db'

In [101]:
get_infos(name)

°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°°
--------------- TABLES -----------------

Tables: ['albums', 'acoustic_features']

--------------- COLUMNS ----------------

----> albums <----
Columns -->  ['id', 'date', 'artist', 'album', 'rank', 'length', 'track_length']

----> acoustic_features <----
Columns -->  ['id', 'song', 'album', 'artist', 'acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence', 'album_id', 'date']


In [102]:
query_db(name, "SELECT * from albums")

Unnamed: 0,id,date,artist,album,rank,length,track_length
0,1,,,,,,
1,2,2019-01-19,A Boogie Wit da Hoodie,Hoodie SZN,1,20.0,185233.800000
2,3,2019-01-19,21 Savage,I Am > I Was,2,15.0,211050.733333
3,4,2019-01-19,Soundtrack,Spider-Man: Into The Spider-Verse,3,13.0,190866.384615
4,5,2019-01-19,Meek Mill,Championships,4,19.0,219173.894737
...,...,...,...,...,...,...,...
573942,573943,1963-01-05,The Dave Brubeck Quartet,The Dave Brubeck Quartet At Carnegie Hall,146,12.0,527888.583333
573943,573944,1963-01-05,Woody Herman,Encore: Woody Herman - 1963,147,,
573944,573945,1963-01-05,Lawrence Welk,1963's Early Hits,148,,
573945,573946,1963-01-05,Rusty Warren,Knockers Up!,149,,


### Most week at number 1

In [None]:
query = 

In [114]:
query = "\
SELECT album, rank, COUNT(rank) AS count \
FROM albums \
WHERE (rank == 1 AND date > 1999)\
GROUP BY album \
ORDER BY count DESC \
LIMIT 10"

In [115]:
df = query_db(name, query)

In [116]:
df

Unnamed: 0,album,rank,count
0,21,1,25
1,Views,1,13
2,Frozen,1,13
3,Supernatural,1,12
4,Fearless,1,11
5,1989,1,11
6,Millennium,1,10
7,25,1,10
8,Confessions,1,9
9,Weathered,1,8


### Best album criteria

In [None]:
query = "\
SELECT artist, \
        album, \
        COUNT(rank) AS number_top_200, \
        SUM(rank) AS sum_classement,  \
        (AVG(rank)*1.0 / COUNT(rank)) AS score \
FROM albums \
GROUP BY album \
ORDER BY score ASC \
LIMIT 11


In [117]:
df = query_db(name, query)