In [30]:
from os import environ
from datetime import datetime, timedelta

from dotenv import load_dotenv
from psycopg2 import extensions, connect
import pandas as pd
from xhtml2pdf import pisa

YESTERDAY_DATE = datetime.strftime(datetime.now() - timedelta(1), '%d-%m-%Y')


def get_db_connection() -> extensions.connection:
    """Returns a connection to the AWS Bandcamp database"""

    try:
        return connect(user=environ["DB_USER"],
                       password=environ["DB_PASSWORD"],
                       host=environ["DB_IP"],
                       port=environ["DB_PORT"],
                       database=environ["DB_NAME"])
    except ConnectionError:
        print("Error: Cannot connect to the database")

In [31]:
def load_all_data(db_connection: extensions.connection) -> pd.DataFrame:
    """Loads all the data from the database into a pandas dataframe"""

    with db_connection.cursor() as curr:

        curr.execute("""
                    SELECT sale_event.*, country.country, artist.artist_name, genre.genre, item_type.item_type, item.item_name
                    FROM sale_event
                    JOIN country
                    ON country.country_id = sale_event.country_id
                    JOIN item
                    ON item.item_id = sale_event.item_id
                    JOIN artist
                    ON artist.artist_id = item.artist_id
                    JOIN item_genre
                    ON item_genre.item_id = item.item_id
                    JOIN genre
                    ON genre.genre_id =item_genre.genre_id
                    JOIN item_type
                    ON item_type.item_type_id = item.item_type_id;""")
        tuples = curr.fetchall()
        column_names = ['sale_id', 'sale_time', 'amount', 'item_id',
                        'country_id', 'country', 'artist', 'genre', 'item_type', 'item_name']

        df = pd.DataFrame(tuples, columns=column_names)

        return df

In [32]:
load_dotenv()
connection = get_db_connection()
data = load_all_data(connection)
data

Unnamed: 0,sale_id,sale_time,amount,item_id,country_id,country,artist,genre,item_type,item_name
0,12661,2024-01-09 03:10:40+00:00,128,4,1,Australia,the ambient drones of bill baxter,soundscapes,album,Procyon
1,9245,2024-01-08 23:08:30+00:00,64,4,21,New Zealand,the ambient drones of bill baxter,soundscapes,album,Procyon
2,8685,2024-01-08 22:22:22+00:00,638,4,10,United States,the ambient drones of bill baxter,soundscapes,album,Procyon
3,8238,2024-01-08 22:03:01+00:00,64,4,8,Canada,the ambient drones of bill baxter,soundscapes,album,Procyon
4,5910,2024-01-08 19:50:54+00:00,127,4,2,United Kingdom,the ambient drones of bill baxter,soundscapes,album,Procyon
...,...,...,...,...,...,...,...,...,...,...
121694,21917,2024-01-09 14:19:58+00:00,127,25676,2,United Kingdom,steve grocott for children,lively,track,New River Train
121695,6376,2024-01-08 20:36:15+00:00,100,9823,2,United Kingdom,yakui,free noise,album,NEVER
121696,6376,2024-01-08 20:36:15+00:00,100,9823,2,United Kingdom,yakui,noise,album,NEVER
121697,6376,2024-01-08 20:36:15+00:00,100,9823,2,United Kingdom,yakui,free improvisation,album,NEVER


In [33]:
unique_sales = data.drop_duplicates(subset='sale_id', keep='first')
album_sales = unique_sales.drop(unique_sales[unique_sales['artist'] != 'we rob rave'].index)
album_sales

Unnamed: 0,sale_id,sale_time,amount,item_id,country_id,country,artist,genre,item_type,item_name
52869,8002,2024-01-08 21:41:51+00:00,101,11608,25,Hungary,we rob rave,juke,track,WE ROB RAVE - BIRD UP (2023 REMIX)


In [34]:
unique_genre_count = data['genre'].value_counts().head(5).reset_index()
unique_genre_count

Unnamed: 0,genre,count
0,electronics,11749
1,techno,4658
2,house,3345
3,ambient,2381
4,experimental,2288


In [35]:
uunique_sales = data.drop_duplicates(subset='sale_id', keep='first')
album_sales = unique_sales.drop(unique_sales[unique_sales['item_type'] == 'track'].index)
popular_albums = album_sales['item_name'].value_counts().sort_values(ascending=False).head(
        5).reset_index()
popular_albums


Unnamed: 0,item_name,count
0,2024 Label Sampler,64
1,Archetypes,55
2,C,42
3,Lander modules,26
4,The Red Chariot,25


In [36]:
unique_sales = data.drop_duplicates(subset='sale_id', keep='first')
album_sales = unique_sales.drop(
     unique_sales[unique_sales['item_type'] == 'track'].index)
album_sales = album_sales.groupby(
    'item_name')['amount'].sum()

album_sales = (
     album_sales/100).sort_values(ascending=False).head(5).reset_index()
album_sales


Unnamed: 0,item_name,amount
0,Beneath the Moss,400.0
1,Archetypes,380.33
2,Oddiyana (24bit),238.19
3,DUCK,230.1
4,Letter To Self,183.91


In [37]:
country_sales = data['country'].value_counts(
).sort_values(ascending=False).reset_index()
country_sales
most_popular_artists = data.groupby('country')['artist'].apply(
    lambda x: x.value_counts().idxmax()).reset_index()
most_popular_artists
final = pd.merge(country_sales, most_popular_artists).head(10).to_dict('records')
final

[{'country': 'United States',
  'count': 40320,
  'artist': 'transcending obscurity records'},
 {'country': 'United Kingdom', 'count': 16219, 'artist': 'various artists'},
 {'country': 'Germany', 'count': 15317, 'artist': 'øl'},
 {'country': 'Australia', 'count': 6941, 'artist': 'various artists'},
 {'country': 'France', 'count': 6543, 'artist': 'various artists'},
 {'country': 'Canada', 'count': 4669, 'artist': 'radd'},
 {'country': 'Japan', 'count': 4337, 'artist': 'kordhell'},
 {'country': 'Netherlands', 'count': 2477, 'artist': 'abstract void'},
 {'country': 'Switzerland', 'count': 2139, 'artist': 'various artists'},
 {'country': 'Belgium', 'count': 1886, 'artist': 'now you shred'}]

In [38]:
unique_sales = data.drop_duplicates(subset='sale_id', keep='first')

track_sales = unique_sales.drop(
    unique_sales[unique_sales['item_type'] == 'album'].index)
popular_tracks = track_sales['item_name'].value_counts().sort_values(ascending=False).head(
    5).reset_index()
selected = popular_tracks['item_name'].to_list()

track_sales = data[data['item_type'] == 'track']

filtered_track_sales = track_sales[track_sales['item_name'].isin(selected)]

track_genre = filtered_track_sales.groupby(['item_name'])[
    'genre'].agg(list).reset_index()

track_genre['genre'] = track_genre['genre'].apply(
    lambda x: list(set(x)))

final = pd.merge(popular_tracks, track_genre).to_dict('records')
track_sales

Unnamed: 0,sale_id,sale_time,amount,item_id,country_id,country,artist,genre,item_type,item_name
120,1,2024-01-08 12:10:10+00:00,254,70,16,Netherlands,anunaku & dj plead,other,track,Wheele
121,3,2024-01-08 12:10:22+00:00,100,71,3,Germany,lon,electronics,track,Marcos Valle - Estrelar (LON Remix)
122,3,2024-01-08 12:10:22+00:00,100,71,3,Germany,lon,brazilian music,track,Marcos Valle - Estrelar (LON Remix)
123,3,2024-01-08 12:10:22+00:00,100,71,3,Germany,lon,indie,track,Marcos Valle - Estrelar (LON Remix)
124,3,2024-01-08 12:10:22+00:00,100,71,3,Germany,lon,disco,track,Marcos Valle - Estrelar (LON Remix)
...,...,...,...,...,...,...,...,...,...,...
121690,21916,2024-01-09 14:19:58+00:00,438,25675,7,France,archypness,electronics,track,Archypness - Insecta
121691,21917,2024-01-09 14:19:58+00:00,127,25676,2,United Kingdom,steve grocott for children,kids,track,New River Train
121692,21917,2024-01-09 14:19:58+00:00,127,25676,2,United Kingdom,steve grocott for children,children's music,track,New River Train
121693,21917,2024-01-09 14:19:58+00:00,127,25676,2,United Kingdom,steve grocott for children,acoustic,track,New River Train


In [39]:
unique_sales = data.drop_duplicates(subset='sale_id', keep='first')
album_sales = unique_sales.drop(
    unique_sales[unique_sales['item_type'] == 'track'].index)
popular_albums = album_sales['item_name'].value_counts().sort_values(ascending=False).head(
    5).reset_index()
selected = popular_albums['item_name'].to_list()

album_sales = data[data['item_type'] == 'album']

filtered_album_sales = album_sales[album_sales['item_name'].isin(selected)]

albums_genre = filtered_album_sales.groupby(['item_name'])[
'genre'].agg(list).reset_index()

albums_genre['genre'] = albums_genre['genre'].apply(
lambda x: list(set(x))[:3])

albums_genre



Unnamed: 0,item_name,genre
0,2024 Label Sampler,"[blackened death metal, brutal death metal, de..."
1,Archetypes,"[synthwave, rock, electronics]"
2,C,[electronics]
3,Lander modules,"[downtempo, electronics, space ambient]"
4,The Red Chariot,"[stoner rock, rock, post punk]"
