In [15]:
import pandas as pd
import numpy as np

black_friday = pd.read_csv("black_friday.csv")

def q1():
    """
    Answer question 1
    
    Returns
    -------
    tuple
        A tuple with quantity of instances and features in the following
        format `(instance_qtd, features_qtd)`
    """
    return black_friday.shape


def q2():
    """
    Answer question 2
    
    Returns
    -------
    int
        Number of women between 26 and 35 years old
    """
    # Generate the mask to be used on indexing
    female_mask = black_friday['Gender']=='F'
    age_26_35_mask = black_friday['Age']=='26-35'

    # Dataset with only women with age between 26 and 35 years old
    female_26_35 = black_friday[age_26_35_mask & female_mask]
    
    return female_26_35.shape[0]
    

def q3():
    """
    Answer question 3
    
    Returns
    -------
    int
        Unique users on dataset
    """
    # Drop all users duplicated
    unique_users = black_friday['User_ID'].drop_duplicates() 
    
    return unique_users.size


def q4():
    """
    Answer question 4
    
    Returns
    -------
    int
        Number of diferent datatypes on the dataset
    """
    return black_friday.dtypes.nunique()


def q5():
    """
    Answer question 5
    
    Returns
    -------
    float
        Relation between number of lines with Nan and number of 
        all lines
    """
    lines_qtd = black_friday.shape[0]
    without_nan = black_friday.dropna().shape[0]
    
    return 1 - (without_nan/lines_qtd)


def q6():
    """
    Answer question 6
    
    Returns
    -------
    int
        Number of missing data of the feature with the highest number
        of `NaN` 
    """
    total_size = black_friday.shape[0]
    
    # Number of values of the feature with the highest NaN occurrence
    lowest_data_qtd = black_friday.count().min()
    
    return int(total_size-lowest_data_qtd)


def q7():
    """
    Answer question 7
    
    Returns
    -------
    int
        Number of missing data of the feature with the highest number
        of `NaN` 
    """
    return int(black_friday['Product_Category_3'].mode()[0])


def q8():
    """
    Answer question 8
    
    Returns
    -------
    float
        Purchase mean after its normalization
    """
    purchase = black_friday['Purchase']

    purchase_min = purchase.min()
    purchase_max = purchase.max()

    purchase_norm = (purchase-purchase_min) / (purchase_max-purchase_min)
    
    return float(purchase_norm.mean())


def q9():
    """
    Answer question 9
    
    Returns
    -------
    int
        Number of purchases between -1 and 1 after its standardization
    """
    purchase = black_friday['Purchase']
    
    purchase_mean = purchase.mean()
    purchase_std = purchase.std()
    
    purchase_standardized = (purchase-purchase_mean) / purchase_std
    
    purchase_mask = purchase_standardized.between(-1, 1)
    
    return purchase[purchase_mask].size


def q10():
    """
    Answer question 10
    
    Returns
    -------
    bool
        `True` if all missing data in `Product_Category_3` is also a 
        missing value in `Product_Category_2` and `False` otherwise.
    """
    product_cat_2_mask = black_friday['Product_Category_2'].isna()
    product_cat_3 = black_friday['Product_Category_3']
    
    product_cat_3_answer = product_cat_3[product_cat_2_mask]
    
    return product_cat_3_answer.dropna().empty
