#### Exploratory Data Analysis & Key Insights
Dataset: _music_project_en.csv_  
Author: Luis Sergio Pastrana Lemus  
Date: 2025-04-23

## __1. Libraries__

In [2]:
from IPython.display import display, HTML
import os
import pandas as pd
from pathlib import Path
import sys

# Define project root dynamically, gets the current directory from whick the notebook belongs and moves one level upper
project_root = Path.cwd().parent

# Add src to sys.path if it is not already
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

# Import function directly (more controlled than import *)
from src import *

## __2. Path to Data file__

In [3]:
# Build route to data file and upload
data_file_path = project_root / "data" / "processed"
df_music = load_dataset_from_csv(data_file_path, "music_clean.csv", sep=',', header='infer', keep_default_na=False)


## 3 __Casting to category data type__

In [8]:
# Casting to category dtype
df_music['genre'] = df_music['genre'].astype('category')
df_music['city'] = df_music['city'].astype('category')
df_music['day'] = df_music['day'].astype('category')

In [9]:
# Checking dtypes after imputation
display(HTML(f"> Data types after imputation:\n"))
print(df_music.loc[:, ['genre', 'city', 'day']].dtypes)

genre    category
city     category
day      category
dtype: object


## __4. Exploratory Data Analysis__

In [10]:
df_music.describe()

Unnamed: 0,userid,track,artist,genre,city,time,day
count,59991,59991,59991,59991,59991,59991,59991
unique,41330,39001,37751,264,2,20303,3
top,E8339398,brand,unknown,pop,springfield,21:51:22,friday
freq,43,127,5835,8184,41873,12,21475


In [11]:
df_music.dtypes

userid      object
track       object
artist      object
genre     category
city      category
time        object
day       category
dtype: object