# Pivot tables
Restauración de datos: Pivot y Reshape en Pandas

In [2]:
import pandas as pd

sales_data = pd.read_csv('Online_Retail.csv', encoding='latin1')

In [None]:
povit_table = pd.pivot_table(sales_data, values = 'Quantity', index = 'Country', columns = 'StockCode', aggfunc = 'sum')

print(povit_table)

StockCode             10002  10080  10120  10123C  10123G  10124A  10124G  \
Country                                                                     
Australia               NaN    NaN    NaN     NaN     NaN     NaN     NaN   
Austria                 NaN    NaN    NaN     NaN     NaN     NaN     NaN   
Bahrain                 NaN    NaN    NaN     NaN     NaN     NaN     NaN   
Belgium                 NaN    NaN    NaN     NaN     NaN     NaN     NaN   
Brazil                  NaN    NaN    NaN     NaN     NaN     NaN     NaN   
Canada                  NaN    NaN    NaN     NaN     NaN     NaN     NaN   
Channel Islands         NaN    NaN    NaN     NaN     NaN     NaN     NaN   
Cyprus                  NaN    NaN    NaN     NaN     NaN     NaN     NaN   
Czech Republic          NaN    NaN    NaN     NaN     NaN     NaN     NaN   
Denmark                 NaN    NaN    NaN     NaN     NaN     NaN     NaN   
EIRE                   12.0    NaN    NaN     NaN     NaN     NaN     NaN   

In [5]:
df = pd.DataFrame({
    'A': ['foo', 'bar', 'baz'],
    'B': [1, 2, 3],
    'C': [4, 5, 6]
})
print(df)

     A  B  C
0  foo  1  4
1  bar  2  5
2  baz  3  6


In [8]:
df_stacked = df.stack()
print(df_stacked)

0  A    foo
   B      1
   C      4
1  A    bar
   B      2
   C      5
2  A    baz
   B      3
   C      6
dtype: object


In [9]:
df_unstacked = df_stacked.unstack()
print(df_unstacked)

     A  B  C
0  foo  1  4
1  bar  2  5
2  baz  3  6


In [10]:
df_sales_stacked = sales_data.stack()
print(df_sales_stacked)

0       InvoiceNo                                  536365
        StockCode                                  85123A
        Description    WHITE HANGING HEART T-LIGHT HOLDER
        Quantity                                        6
        InvoiceDate                          12/1/10 8:26
                                      ...                
541908  Quantity                                        3
        InvoiceDate                         12/9/11 12:50
        UnitPrice                                    4.95
        CustomerID                                12680.0
        Country                                    France
Length: 4198738, dtype: object


In [11]:
df_sales_unstacked = df_sales_stacked.unstack()
print(df_sales_unstacked)

       InvoiceNo StockCode                          Description Quantity  \
0         536365    85123A   WHITE HANGING HEART T-LIGHT HOLDER        6   
1         536365     71053                  WHITE METAL LANTERN        6   
2         536365    84406B       CREAM CUPID HEARTS COAT HANGER        8   
3         536365    84029G  KNITTED UNION FLAG HOT WATER BOTTLE        6   
4         536365    84029E       RED WOOLLY HOTTIE WHITE HEART.        6   
...          ...       ...                                  ...      ...   
541904    581587     22613          PACK OF 20 SPACEBOY NAPKINS       12   
541905    581587     22899         CHILDREN'S APRON DOLLY GIRL         6   
541906    581587     23254        CHILDRENS CUTLERY DOLLY GIRL         4   
541907    581587     23255      CHILDRENS CUTLERY CIRCUS PARADE        4   
541908    581587     22138        BAKING SET 9 PIECE RETROSPOT         3   

          InvoiceDate UnitPrice CustomerID         Country  
0        12/1/10 8:26     

# Merge, Concat y Join

In [13]:
# crear un dataframe de ejemplo
df1 = pd.DataFrame({
    'key': ['A', 'B', 'C'],
    'value1': [1, 2, 3],
})

df2 = pd.DataFrame({
    'key': ['B', 'C', 'D'],
    'value2': [4, 5, 6],
})

print(df1)
print(df2)

  key  value1
0   A       1
1   B       2
2   C       3
  key  value2
0   B       4
1   C       5
2   D       6


In [15]:
inner_merge = pd.merge(df1, df2, on='key', how='inner')
print(inner_merge)

  key  value1  value2
0   B       2       4
1   C       3       5


In [17]:
outer_merged = pd.merge(df1, df2, on='key', how='outer')
print(outer_merged)

  key  value1  value2
0   A     1.0     NaN
1   B     2.0     4.0
2   C     3.0     5.0
3   D     NaN     6.0


In [18]:
left_merged = pd.merge(df1, df2, on='key', how='left')
print(left_merged)

  key  value1  value2
0   A       1     NaN
1   B       2     4.0
2   C       3     5.0


In [19]:
right_merged = pd.merge(df1, df2, on='key', how='right')
print(right_merged)

  key  value1  value2
0   B     2.0       4
1   C     3.0       5
2   D     NaN       6


Concat

In [20]:
df3 = pd.DataFrame({
    'A': ['A0', 'A1',  'A2'],
    'B': ['B0', 'B1', 'B2'],
})

df4 = pd.DataFrame({
    'A': ['A3', 'A4',  'A5'],
    'B': ['B3', 'B4', 'B5'],
})

print(df3)
print(df4)

    A   B
0  A0  B0
1  A1  B1
2  A2  B2
    A   B
0  A3  B3
1  A4  B4
2  A5  B5


In [21]:
vertical_concat = pd.concat([df3, df4])
print(vertical_concat)

    A   B
0  A0  B0
1  A1  B1
2  A2  B2
0  A3  B3
1  A4  B4
2  A5  B5


In [22]:
hotizontal_concat = pd.concat([df3, df4], axis=1)
print(hotizontal_concat)

    A   B   A   B
0  A0  B0  A3  B3
1  A1  B1  A4  B4
2  A2  B2  A5  B5


Join

In [33]:
df5 = pd.DataFrame({
    'A': ['A0', 'A1', 'A2'],
    'B': ['B0', 'B1', 'B2']
  }, 
  index = ['K0', 'K1', 'K2']
) 

df6 = pd.DataFrame({
    'C': ['C0', 'C1', 'C2'],
    'D': ['D0', 'D1', 'D2']
  }, 
  index = ['K0', 'K2', 'K3']
)

print(df5)
print(df6)

     A   B
K0  A0  B0
K1  A1  B1
K2  A2  B2
     C   D
K0  C0  D0
K2  C1  D1
K3  C2  D2


In [34]:
joined = df5.join(df6, how='inner')
print(joined)

     A   B   C   D
K0  A0  B0  C0  D0
K2  A2  B2  C1  D1
