In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sqlalchemy import create_engine, inspect
from dotenv import load_dotenv
import os
import logging
import pickle

In [2]:
logging.basicConfig(level=logging.INFO)

def load_grammy():
    try:
        load_dotenv()

        localhost = os.getenv('LOCALHOST')
        port = os.getenv('PORT')
        nameDB = os.getenv('DB_NAME')
        userDB = os.getenv('DB_USER')
        passDB = os.getenv('DB_PASS')
        
        engine = create_engine(f'postgresql+psycopg2://{userDB}:{passDB}@{localhost}:{port}/{nameDB}')
        inspector = inspect(engine)
        
        connection = engine.connect()
        logging.info("Successfully connected to the database.")
        
        dataframe = 'grammy_awards'  
        df_grammy = pd.read_sql_table(dataframe, engine)
        
        logging.info("Successfully loaded the data.")
        logging.info(df_grammy.head(5))
        
        connection.close()
        
        return df_grammy
    
    
    except Exception as e:
        logging.error(f"Error loading the data: {str(e)}")
        raise
    
data = load_grammy()

INFO:root:Successfully connected to the database.
INFO:root:Successfully loaded the data.
INFO:root:   year                              title              published_at  \
0  2019  62nd Annual GRAMMY Awards  (2019) 2020-05-19 12:10:28+00:00   
1  2019  62nd Annual GRAMMY Awards  (2019) 2020-05-19 12:10:28+00:00   
2  2019  62nd Annual GRAMMY Awards  (2019) 2020-05-19 12:10:28+00:00   
3  2019  62nd Annual GRAMMY Awards  (2019) 2020-05-19 12:10:28+00:00   
4  2019  62nd Annual GRAMMY Awards  (2019) 2020-05-19 12:10:28+00:00   

                 updated_at            category     nominee         artist  \
0 2020-05-19 12:10:28+00:00  Record Of The Year     Bad Guy  Billie Eilish   
1 2020-05-19 12:10:28+00:00  Record Of The Year     Hey, Ma       Bon Iver   
2 2020-05-19 12:10:28+00:00  Record Of The Year     7 rings  Ariana Grande   
3 2020-05-19 12:10:28+00:00  Record Of The Year  Hard Place         H.E.R.   
4 2020-05-19 12:10:28+00:00  Record Of The Year        Talk         Khalid   

In [3]:
def check_grammy(df):
    try:
        logging.info("Starting the ckeck process.")
        null_counts = df.isnull().sum()
        logging.info(f"The total of values null in the dataframe is: \n{null_counts} ")
        num_duplicates = df.duplicated().sum()
        logging.info(f"The total of duplicates in the dataframe is: {num_duplicates}")
        logging.info(f"Cheacking type of data: \n{df.info()}")
        logging.info("The dataframe is ready to merge.")
        grammy_ready = df
        
        with open('grammy_ready_df.pkl', 'wb') as f:
            pickle.dump(grammy_ready, f)
        logging.info("The dataframe has been saved in 'grammy_ready_df.pkl'.")
        
        return grammy_ready
        
    except Exception as e:
        logging.error(f"Error checking the data: {str(e)}")
        
check_grammy(data)

INFO:root:Starting the ckeck process.
INFO:root:The total of values null in the dataframe is: 
year            0
title           0
published_at    0
updated_at      0
category        0
nominee         0
artist          0
workers         0
winner          0
dtype: int64 
INFO:root:The total of duplicates in the dataframe is: 0
INFO:root:Cheacking type of data: 
None
INFO:root:The dataframe is ready to merge.
INFO:root:The dataframe has been saved in 'grammy_ready_df.pkl'.


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2970 entries, 0 to 2969
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype              
---  ------        --------------  -----              
 0   year          2970 non-null   int64              
 1   title         2970 non-null   object             
 2   published_at  2970 non-null   datetime64[ns, UTC]
 3   updated_at    2970 non-null   datetime64[ns, UTC]
 4   category      2970 non-null   object             
 5   nominee       2970 non-null   object             
 6   artist        2970 non-null   object             
 7   workers       2970 non-null   object             
 8   winner        2970 non-null   bool               
dtypes: bool(1), datetime64[ns, UTC](2), int64(1), object(5)
memory usage: 188.7+ KB


Unnamed: 0,year,title,published_at,updated_at,category,nominee,artist,workers,winner
0,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 12:10:28+00:00,2020-05-19 12:10:28+00:00,Record Of The Year,Bad Guy,Billie Eilish,"Finneas O'Connell, producer; Rob Kinelski & Fi...",True
1,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 12:10:28+00:00,2020-05-19 12:10:28+00:00,Record Of The Year,"Hey, Ma",Bon Iver,"BJ Burton, Brad Cook, Chris Messina & Justin V...",True
2,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 12:10:28+00:00,2020-05-19 12:10:28+00:00,Record Of The Year,7 rings,Ariana Grande,"Charles Anderson, Tommy Brown, Michael Foster ...",True
3,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 12:10:28+00:00,2020-05-19 12:10:28+00:00,Record Of The Year,Hard Place,H.E.R.,"Rodney “Darkchild” Jerkins, producer; Joseph H...",True
4,2019,62nd Annual GRAMMY Awards (2019),2020-05-19 12:10:28+00:00,2020-05-19 12:10:28+00:00,Record Of The Year,Talk,Khalid,"Disclosure & Denis Kosiak, producers; Ingmar C...",True
...,...,...,...,...,...,...,...,...,...
2965,1958,1st Annual GRAMMY Awards (1958),2017-11-28 08:03:45+00:00,2019-09-10 08:11:09+00:00,Best Country & Western Performance,Tom Dooley,The Kingston Trio,Unknown,True
2966,1958,1st Annual GRAMMY Awards (1958),2017-11-28 08:03:45+00:00,2019-09-10 08:11:09+00:00,Best Rhythm & Blues Performance,Tequila,The Champs,Unknown,True
2967,1958,1st Annual GRAMMY Awards (1958),2017-11-28 08:03:45+00:00,2019-09-10 08:11:09+00:00,"Best Sound Track Album, Dramatic Picture Score...",Gigi,Andre Previn,Unknown,True
2968,1958,1st Annual GRAMMY Awards (1958),2017-11-28 08:03:45+00:00,2019-09-10 08:11:09+00:00,"Best Performance, Documentary Or Spoken Word",The Best Of The Stan Freberg Shows,Stan Freberg,Unknown,True
