# Import Libraries

In [7]:
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
from sqlalchemy import create_engine, MetaData, Table
from sqlalchemy.orm import sessionmaker
import json

__Read data from DB__

In [8]:
with open('../config/credentials.json', 'r') as json_file:
    data = json.load(json_file)
    host = data["host"]
    database = data["database"]
    user = data["user"]
    password = data["password"]

# Crear Conexión
database_connection = f"postgresql://{user}:{password}@{host}/{database}"
engine = create_engine(database_connection)

# Inicializar la sesión
Session = sessionmaker(bind=engine)
session = Session()
metadata = MetaData(bind=engine)
metadata.reflect()

# Acceder a la tabla 'grammys_data'
grammys_table = metadata.tables['grammys_data']
grammys_records = session.query(grammys_table).all()
grammys_data = [record._asdict() for record in grammys_records]
df_grammys = pd.DataFrame(grammys_data)

# Cerrar la sesión
session.close()

  metadata = MetaData(bind=engine)


In [9]:
df=df_grammys
df.head()

Unnamed: 0,year,title,published_at,updated_at,category,nominee,artist,workers,img,winner
0,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 07:10:28,2020-05-19 07:10:28,Record Of The Year,Bad Guy,Billie Eilish,"Finneas O'Connell, producer; Rob Kinelski & Fi...",https://www.grammy.com/sites/com/files/styles/...,True
1,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 07:10:28,2020-05-19 07:10:28,Record Of The Year,"Hey, Ma",Bon Iver,"BJ Burton, Brad Cook, Chris Messina & Justin V...",https://www.grammy.com/sites/com/files/styles/...,True
2,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 07:10:28,2020-05-19 07:10:28,Record Of The Year,7 rings,Ariana Grande,"Charles Anderson, Tommy Brown, Michael Foster ...",https://www.grammy.com/sites/com/files/styles/...,True
3,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 07:10:28,2020-05-19 07:10:28,Record Of The Year,Hard Place,H.E.R.,"Rodney “Darkchild” Jerkins, producer; Joseph H...",https://www.grammy.com/sites/com/files/styles/...,True
4,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 07:10:28,2020-05-19 07:10:28,Record Of The Year,Talk,Khalid,"Disclosure & Denis Kosiak, producers; Ingmar C...",https://www.grammy.com/sites/com/files/styles/...,True


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4810 entries, 0 to 4809
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   year          4810 non-null   int64         
 1   title         4810 non-null   object        
 2   published_at  4810 non-null   datetime64[ns]
 3   updated_at    4810 non-null   datetime64[ns]
 4   category      4810 non-null   object        
 5   nominee       4810 non-null   object        
 6   artist        4810 non-null   object        
 7   workers       4810 non-null   object        
 8   img           4810 non-null   object        
 9   winner        4810 non-null   bool          
dtypes: bool(1), datetime64[ns](2), int64(1), object(6)
memory usage: 343.0+ KB


In [24]:
df.describe()

Unnamed: 0,year,published_at,updated_at
count,4810.0,4810,4810
mean,1995.566944,2018-02-27 05:58:30.454677248,2019-09-24 09:10:26.281496832
min,1958.0,2017-11-28 03:03:45,2017-11-28 03:03:45
25%,1983.0,2017-11-28 03:03:45,2019-09-10 03:06:59
50%,1998.0,2017-11-28 03:03:45,2019-09-10 03:08:19
75%,2010.0,2017-11-28 03:03:45,2019-09-10 03:11:09
max,2019.0,2020-05-19 07:10:28,2020-09-01 14:16:40
std,17.14972,,


In [25]:
category_counts = df['artist'].value_counts()
print(category_counts)

artist
(Various Artists)                                                                                                               66
U2                                                                                                                              18
Aretha Franklin                                                                                                                 16
Bruce Springsteen                                                                                                               13
Stevie Wonder                                                                                                                   13
                                                                                                                                ..
Yo-Yo Ma                                                                                                                         1
Christopher Guest, Eugene Levy & Michael McKean, songwriters (The Folksmen, 

In [26]:
# Reemplaza la cadena 'NaN' con NaN verdadero
df.replace('NaN', np.nan, inplace=True)
print(df.isnull().sum())


year               0
title              0
published_at       0
updated_at         0
category           0
nominee            6
artist          1840
workers         2190
img             1367
winner             0
dtype: int64


In [27]:
# Calculate NAs Percent
total_entries = len(df)
missing_per_column = df.isnull().sum()
percentage_missing = (missing_per_column / total_entries) * 100
print(percentage_missing)

year             0.000000
title            0.000000
published_at     0.000000
updated_at       0.000000
category         0.000000
nominee          0.124740
artist          38.253638
workers         45.530146
img             28.419958
winner           0.000000
dtype: float64
