# **Pandas**

### **Librerías**

In [1]:
import pandas as pd
import numpy as np

### **Datos**

In [128]:
# Data para el producto A

df_a = pd.DataFrame({
    'Month':pd.date_range(
        start = '01-01-2012',
        end = '31-12-2022',
        freq = 'MS'
    ),
    'Quotes':np.random.randint(
        low = 1_000_000,
        high = 2_500_000,
        size = 132
    ),
    'Numbers':np.random.randint(
        low = 300_000,
        high = 500_000,
        size = 132
    ),
    'Amounts':np.random.randint(
        low = 750_000,
        high = 1_250_000,
        size = 132
    )
})

df_a['Product'] = 'A'

# Data para el producto B

df_b = pd.DataFrame({
    'Month':pd.date_range(
        start = '01-01-2012',
        end = '31-12-2022',
        freq = 'MS'
    ),
    'Quotes':np.random.randint(
        low = 100_000,
        high = 800_000,
        size = 132
    ),
    'Numbers':np.random.randint(
        low = 10_000,
        high = 95_000,
        size = 132
    ),
    'Amounts':np.random.randint(
        low = 450_000,
        high = 750_000,
        size = 132
    )
})

df_b['Product'] = 'B'

In [129]:
# Combinamos los datos
df = pd.concat([df_a, df_b], axis=0)
df.sort_values(by='Month', inplace=True)
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,Month,Quotes,Numbers,Amounts,Product
0,2012-01-01,2234066,418602,1203949,A
1,2012-01-01,495770,72950,552899,B
2,2012-02-01,1228640,377758,784108,A
3,2012-02-01,687294,66554,654344,B
4,2012-03-01,1256217,300708,980733,A


In [130]:
# Columnas calculadas 
df['Average_Sale'] = df['Amounts'] / df['Numbers']
df['Product_Conversion'] = df['Numbers'] / df['Quotes']

In [131]:
df.head()

Unnamed: 0,Month,Quotes,Numbers,Amounts,Product,Average_Sale,Product_Conversion
0,2012-01-01,2234066,418602,1203949,A,2.876119,0.187372
1,2012-01-01,495770,72950,552899,B,7.57915,0.147145
2,2012-02-01,1228640,377758,784108,A,2.075689,0.30746
3,2012-02-01,687294,66554,654344,B,9.831776,0.096835
4,2012-03-01,1256217,300708,980733,A,3.261413,0.239376


### **Ajuste del formato**

In [135]:
# Ajustamos los formatos de los números
df_formatted = df.style.format({
    # Ajustamos el formato de fecha
    # 'Month': '{:%Y-%m}',
    # 'Month': '{:%B-%Y}',
    'Month': '{:%b-%Y}',

    # Ajustamos el formato de los números
    'Quotes': '{:,.0f}',
    'Numbers': '{:,.0f}',
    'Amounts': '${:,.0f}',
    # 'Average_Sale': '{:,.2f} ($)',
    'Average_Sale': '${:,.2f}',
    'Product_Conversion': '{:.2%}',
})
# .hide()

### **Ajuste del formato condicional**

In [150]:
def highlight_product(s, variable, color='yellow'):
    row = pd.Series(data=False, index=s.index)
    row['Product'] = s.loc['Product'] == variable

    return [f'background-color: {color}' if row.any() else '' for value in row]

In [149]:
# Ajustamos los formatos de los números
df.head().style.apply(
    highlight_product, product='A', color='black', axis=1
).format({
    # Ajustamos el formato de fecha
    # 'Month': '{:%Y-%m}',
    # 'Month': '{:%B-%Y}',
    'Month': '{:%b-%Y}',

    # Ajustamos el formato de los números
    'Quotes': '{:,.0f}',
    'Numbers': '{:,.0f}',
    'Amounts': '${:,.0f}',
    # 'Average_Sale': '{:,.2f} ($)',
    'Average_Sale': '${:,.2f}',
    'Product_Conversion': '{:.2%}',
}).hide()

Month,Quotes,Numbers,Amounts,Product,Average_Sale,Product_Conversion
Jan-2012,2234066,418602,"$1,203,949",A,$2.88,18.74%
Jan-2012,495770,72950,"$552,899",B,$7.58,14.71%
Feb-2012,1228640,377758,"$784,108",A,$2.08,30.75%
Feb-2012,687294,66554,"$654,344",B,$9.83,9.68%
Mar-2012,1256217,300708,"$980,733",A,$3.26,23.94%


In [4]:
marketing = pd.read_csv('./data/DirectMarketing.csv')

In [5]:
marketing.head()

Unnamed: 0,Age,Gender,OwnHome,Married,Location,Salary,Children,History,Catalogs,AmountSpent
0,Old,Female,Own,Single,Far,47500,0,High,6,755
1,Middle,Male,Rent,Single,Close,63600,0,High,6,1318
2,Young,Female,Rent,Single,Close,13500,0,Low,18,296
3,Middle,Male,Own,Married,Close,85600,1,High,18,2436
4,Middle,Female,Own,Single,Close,68400,0,High,12,1304


In [14]:
groceries = pd.read_csv('./data/Groceries_dataset.csv')

In [15]:
groceries.head()

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk


### 2. Cambiar los tipos de datos

In [16]:
groceries.dtypes

Member_number       int64
Date               object
itemDescription    object
dtype: object

In [12]:
groceries['Date'] = groceries['Date'].astype('datetime64')

In [13]:
groceries.dtypes

Member_number               int64
Date               datetime64[ns]
itemDescription            object
dtype: object

### 3. Cambiar el formato a fecha

In [17]:
groceries['Date'] = pd.to_datetime(groceries['Date'])

In [19]:
groceries.head()

Unnamed: 0,Member_number,Date,itemDescription
0,1808,2015-07-21,tropical fruit
1,2552,2015-05-01,whole milk
2,2300,2015-09-19,pip fruit
3,1187,2015-12-12,other vegetables
4,3037,2015-01-02,whole milk
