# Pandas Crosstab

## Exemplo 1.

* [pandas.crosstab](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.crosstab.html)

In [1]:
import pandas as pd
import numpy as np

In [0]:
a = np.array(["foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar", "foo", "foo", "foo"], dtype=object)
print('a=',a)

a= ['foo' 'foo' 'foo' 'foo' 'bar' 'bar' 'bar' 'bar' 'foo' 'foo' 'foo']


In [0]:
b = np.array(["one", "one", "one", "two", "one", "one", "one", "two", "two", "two", "one"], dtype=object)
print('b = ', b)

b =  ['one' 'one' 'one' 'two' 'one' 'one' 'one' 'two' 'two' 'two' 'one']


In [0]:
c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny","shiny", "dull", "shiny", "shiny", "shiny"], dtype=object)
print('c = ',c)

c =  ['dull' 'dull' 'shiny' 'dull' 'dull' 'shiny' 'shiny' 'dull' 'shiny'
 'shiny' 'shiny']


In [0]:
df2 = np.stack([a, b, c], axis=1)
print(df2)

[['foo' 'one' 'dull']
 ['foo' 'one' 'dull']
 ['foo' 'one' 'shiny']
 ['foo' 'two' 'dull']
 ['bar' 'one' 'dull']
 ['bar' 'one' 'shiny']
 ['bar' 'one' 'shiny']
 ['bar' 'two' 'dull']
 ['foo' 'two' 'shiny']
 ['foo' 'two' 'shiny']
 ['foo' 'one' 'shiny']]


In [0]:
df3 = pd.DataFrame(df2,columns=['a', 'b', 'c'])
print(df3)

      a    b      c
0   foo  one   dull
1   foo  one   dull
2   foo  one  shiny
3   foo  two   dull
4   bar  one   dull
5   bar  one  shiny
6   bar  one  shiny
7   bar  two   dull
8   foo  two  shiny
9   foo  two  shiny
10  foo  one  shiny


In [0]:
pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c'])

b,one,one,two,two
c,dull,shiny,dull,shiny
a,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
bar,1,2,1,0
foo,2,2,1,2


In [0]:
x = {"a", "b", "c"}
y = {"f", "d", "a"}
z = {"c", "d", "e"}

result = x.union(y, z) 

print(result)

{'f', 'd', 'b', 'e', 'a', 'c'}


# Merge de dataframes no Pandas

## Exemplo de merge de dataframes

* [Exemplo - vlookup no Pandas](https://stackoverflow.com/questions/25493625/vlookup-in-pandas-using-join)

In [10]:
df1 = pd.DataFrame(
{
'sku':[122,123,113,122,123,122,301],
'loc':[61,61,62,62,62,63,63],
'flag':['True','True','True','True','False','False','True']
 })

print(df1)

   sku  loc   flag
0  122   61   True
1  123   61   True
2  113   62   True
3  122   62   True
4  123   62  False
5  122   63  False
6  301   63   True


In [11]:
df2 = pd.DataFrame(
{
'sku':[113,122,123,301],
'dept':['a','b','b','c']
})

print(df2)

   sku dept
0  113    a
1  122    b
2  123    b
3  301    c


In [12]:
df3 = df2.merge(df1, on='sku', how='left')

print(df3)

   sku dept  loc   flag
0  113    a   62   True
1  122    b   61   True
2  122    b   62   True
3  122    b   63  False
4  123    b   61   True
5  123    b   62  False
6  301    c   63   True


In [13]:
aux = df2.set_index('sku')['dept'].to_dict()
df1['dept'] = df1.sku.map(aux)

print(df1)

   sku  loc   flag dept
0  122   61   True    b
1  123   61   True    b
2  113   62   True    a
3  122   62   True    b
4  123   62  False    b
5  122   63  False    b
6  301   63   True    c


In [14]:
dict1 = {113:'a',
         122:'b',
         123:'b',
         301:'c'}

In [15]:
df1['dept2'] = df1['sku'].apply(lambda x: dict1[x])
print(df1)

   sku  loc   flag dept dept2
0  122   61   True    b     b
1  123   61   True    b     b
2  113   62   True    a     a
3  122   62   True    b     b
4  123   62  False    b     b
5  122   63  False    b     b
6  301   63   True    c     c


# Matrizes de incidência

## Exemplo de pivot table e matriz de incidência no Pandas

In [16]:
cat = np.array(['a','a','a','a','a','b','b'])
print(cat)

['a' 'a' 'a' 'a' 'a' 'b' 'b']


In [17]:
prod = np.array(['product 1','product 2','product 3','product 1','product 4','product 5','product 6'])
print(prod)

['product 1' 'product 2' 'product 3' 'product 1' 'product 4' 'product 5'
 'product 6']


In [18]:
dfx = np.stack([cat,prod],axis=1)
print(dfx)

[['a' 'product 1']
 ['a' 'product 2']
 ['a' 'product 3']
 ['a' 'product 1']
 ['a' 'product 4']
 ['b' 'product 5']
 ['b' 'product 6']]


In [19]:
df1 = pd.DataFrame([cat,prod])
print(df1)

           0          1          2          3          4          5          6
0          a          a          a          a          a          b          b
1  product 1  product 2  product 3  product 1  product 4  product 5  product 6


In [20]:
df = pd.DataFrame(
{'Category':['a','a','a','a','a','b','b'],
'Product_ID':['product 1','product 2','product 3','product 1','product 4','product 5','product 6']}
)
print(df)

  Category Product_ID
0        a  product 1
1        a  product 2
2        a  product 3
3        a  product 1
4        a  product 4
5        b  product 5
6        b  product 6


In [21]:
pd.crosstab(df['Product_ID'], df['Product_ID'])

Product_ID,product 1,product 2,product 3,product 4,product 5,product 6
Product_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
product 1,2,0,0,0,0,0
product 2,0,1,0,0,0,0
product 3,0,0,1,0,0,0
product 4,0,0,0,1,0,0
product 5,0,0,0,0,1,0
product 6,0,0,0,0,0,1


In [22]:
pd.crosstab(df['Product_ID'], df['Product_ID'], normalize=True)

Product_ID,product 1,product 2,product 3,product 4,product 5,product 6
Product_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
product 1,0.285714,0.0,0.0,0.0,0.0,0.0
product 2,0.0,0.142857,0.0,0.0,0.0,0.0
product 3,0.0,0.0,0.142857,0.0,0.0,0.0
product 4,0.0,0.0,0.0,0.142857,0.0,0.0
product 5,0.0,0.0,0.0,0.0,0.142857,0.0
product 6,0.0,0.0,0.0,0.0,0.0,0.142857


## Outro exemplo de matriz de incidência

In [23]:
df1 = pd.DataFrame(
{
'index':[0,1,2,3],
'Name_A':['Adam', 'Chris', 'Adam', 'Ben'],
'Name_B':['Ben', 'David', 'Chris', 'Chris']
 })
print(df1)

   index Name_A Name_B
0      0   Adam    Ben
1      1  Chris  David
2      2   Adam  Chris
3      3    Ben  Chris


In [24]:
df = pd.crosstab(df1.Name_A, df1.Name_B)
print(df)

Name_B  Ben  Chris  David
Name_A                   
Adam      1      1      0
Ben       0      1      0
Chris     0      0      1


In [25]:
df = pd.crosstab(df1.Name_A, df1.Name_B)
idx = df.columns.union(df.index)
df = df.reindex(index = idx, columns=idx, fill_value=0)
print (df)

       Adam  Ben  Chris  David
Adam      0    1      1      0
Ben       0    0      1      0
Chris     0    0      0      1
David     0    0      0      0


In [26]:
print(df.index)

Index(['Adam', 'Ben', 'Chris', 'David'], dtype='object')


In [27]:
print(df.reindex)

<bound method DataFrame.reindex of        Adam  Ben  Chris  David
Adam      0    1      1      0
Ben       0    0      1      0
Chris     0    0      0      1
David     0    0      0      0>


# Operações básicas com colunas

* [How do I sum values in a column that match a given condition using pandas?](https://intellipaat.com/community/49/how-do-i-sum-values-in-a-column-that-match-a-given-condition-using-pandas)
* []()
* []()