In [3]:
import pandas as pd
import numpy as np

![image.png](attachment:image.png)

# Hierarchical indexing (MultiIndex)
Hiyerarşik / Çok seviyeli indeksleme, özellikle daha yüksek boyutlu verilerle çalışmak için oldukça karmaşık veri analizi ve manipülasyonuna kapı açtığı için çok heyecan vericidir. Özünde, Series(1d) ve DataFrame(2d) gibi daha düşük boyutlu veri yapılarında keyfi sayıda boyuta sahip verileri depolamanıza ve değiştirmenize olanak tanır .

## Bir MultiIndex (hiyerarşik dizin) nesnesi oluşturma
MultiIdex 
- MultiIndex.from_arrays()
- MultiIndex.from_tuples()
- MultiIndex.from_product()
- MultiIndex.from_frame()
methodları yoluyla oluşturulabilir.

### 1.MultiIndex.from_tuples()

In [30]:
arrays = [
        ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
        ["one", "two", "one", "two", "one", "two", "one", "two"],
    ]
tuples = list(zip(*arrays))
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

### 2. MultiIndex.from_product()

In [18]:
iterables = [["bar", "baz", "foo", "qux"], ["one", "two"]]

In [19]:
index = pd.MultiIndex.from_product(iterables, names=["first", "second"])
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [14]:
df = pd.DataFrame(np.random.randn(8,2), index=index)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.996457,0.343901
bar,two,0.000856,1.257738
baz,one,-0.340225,0.311046
baz,two,-0.789309,-0.62134
foo,one,0.088308,0.659318
foo,two,1.380514,-0.657121
qux,one,0.439413,1.586816
qux,two,2.478965,1.499704


### 3.MultiIndex.to_frame()

In [21]:
df1 = pd.DataFrame(
        [["bar", "one"], ["bar", "two"], ["foo", "one"], ["foo", "two"]],
        columns=["first", "second"])

In [22]:
index=pd.MultiIndex.from_frame(df1)
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('foo', 'one'),
            ('foo', 'two')],
           names=['first', 'second'])

In [24]:
df = pd.DataFrame(np.random.randn(4,3),index=index)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,-0.397469,1.015697,0.12463
bar,two,-0.855598,-1.637722,0.752554
foo,one,0.14505,1.476093,1.137193
foo,two,-0.490999,0.484136,0.060073


### 4.MultiIndex.from_arrays()

In [25]:
arrays = [
        np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]),
        np.array(["one", "two", "one", "two", "one", "two", "one", "two"]),
    ]

In [37]:
df = pd.DataFrame(np.random.randn(8, 4), index=arrays, columns=['var1','var2','var3','var4'])
df

Unnamed: 0,Unnamed: 1,var1,var2,var3,var4
bar,one,0.135537,0.235311,-1.825717,-1.094835
bar,two,0.07465,-1.402979,0.140099,0.603443
baz,one,-0.44215,-1.047133,-1.485128,-1.059406
baz,two,0.053054,1.2809,0.207592,0.2566
foo,one,1.433014,-1.565186,-1.246064,-0.896227
foo,two,1.57803,-0.248011,-1.192654,0.662875
qux,one,-0.24179,1.127899,-1.429158,-1.004573
qux,two,0.349361,0.860526,-0.10259,-1.115299


### 5. Hierarchical columns 

In [35]:
df = pd.DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index)
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,-0.767493,0.551178,-0.294556,-1.547463,-0.115736,2.136012,-2.039015,0.273375
B,-0.040183,-0.199296,-0.207995,1.159967,-0.537932,-0.669408,1.845931,1.657198
C,-1.238277,-0.076833,0.483391,1.757537,0.99403,0.517452,0.330726,-1.238205


## Uygulama1:
![image.png](attachment:image.png)
şeklinde multi indexli bir data frame oluşturun

# Reshaping and pivot tables

In [1]:
import numpy as np
import pandas as pd

![image.png](attachment:image.png)

### 1.Data Frame oluşturma

In [4]:
data = {
    'foo':['ONE','ONE','ONE','TWO','TWO','TWO'],
    'bar':['A','B','C']*2,
    'baz':[1,2,3,4,5,6],
    'zoo':['x','y','z','q','w','t']
}
df = pd.DataFrame(data, columns=['foo','bar','baz','zoo'])

In [5]:
df

Unnamed: 0,foo,bar,baz,zoo
0,ONE,A,1,x
1,ONE,B,2,y
2,ONE,C,3,z
3,TWO,A,4,q
4,TWO,B,5,w
5,TWO,C,6,t


### 2.Pivot tablo
### df.pivot(index='...',columns='...',values='...')

In [6]:
df.pivot(index='foo',columns='bar',values='baz')

bar,A,B,C
foo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ONE,1,2,3
TWO,4,5,6


In [7]:
df.pivot(index='foo',columns='bar',values='zoo')

bar,A,B,C
foo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ONE,x,y,z
TWO,q,w,t


In [8]:
df.pivot(index='foo',columns='bar')

Unnamed: 0_level_0,baz,baz,baz,zoo,zoo,zoo
bar,A,B,C,A,B,C
foo,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
ONE,1,2,3,x,y,z
TWO,4,5,6,q,w,t


## 2.Uygulama
- Bir data frame oluşturun
- Değişkenleri 
    var1=['A','A','B','B','C','C'],
    var2=['X','Y','Z','X','Y','Z'],
    var3=[1,2,3,4,5,6] olsun.
- Bu data frame ile index=var1, columns=var2 olan bir pivot tablo oluşturun.
- ilk data frame var4 adında, var3 değişkeninin verilerinin iki katı alınarak oluşturulmuş değişkeni ekleyin
- Yeni data frame ile pivot fonksiyonun values parametresi var3, var4 ve herhangi bir atama yapmadan oluşturulabilecek pivot tabloları elde edin. 

# Reshaping by stacking and unstacking

![image.png](attachment:image.png)

#### MultiIndex DataFrame oluşturma

In [39]:
liste = [
    ['A','A','B','B','C','C'],
    ['X','Y','X','Y','X','Y']
]

tuples = list(zip(*liste))
tuples

[('A', 'X'), ('A', 'Y'), ('B', 'X'), ('B', 'Y'), ('C', 'X'), ('C', 'Y')]

In [51]:
index = pd.MultiIndex.from_product([['A','B','C'],['X','Y']], names=['ind1','ind2'])
index

MultiIndex([('A', 'X'),
            ('A', 'Y'),
            ('B', 'X'),
            ('B', 'Y'),
            ('C', 'X'),
            ('C', 'Y')],
           names=['ind1', 'ind2'])

In [52]:
df = pd.DataFrame(np.random.randn(6,3), index=index, columns=['a','b','c'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c
ind1,ind2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,X,-0.830736,-1.436695,0.429854
A,Y,-0.358724,1.149795,-0.080177
B,X,1.033371,0.616289,0.319243
B,Y,-0.097402,-0.564451,-0.56281
C,X,0.218864,-0.767196,0.361906
C,Y,-0.830087,-1.84615,-0.354172


In [50]:
index = pd.MultiIndex.from_product([["A","B","C"], ["a", "b"]], names=['inx1','inx2'])

df = pd.DataFrame(np.random.randn(6), index=index, columns=["A"])

df

Unnamed: 0_level_0,Unnamed: 1_level_0,A
inx1,inx2,Unnamed: 2_level_1
A,a,0.944725
A,b,-1.665948
B,a,-1.574773
B,b,-0.22819
C,a,-0.543288
C,b,1.509478


## df.stack( )

In [44]:
stacked = df.stack()
stacked

ind1  ind2   
A     X     a   -1.128599
            b    0.362903
            c    0.078245
      Y     a    0.110144
            b   -0.041710
            c    0.309239
B     X     a    1.205815
            b   -1.698855
            c    0.552461
      Y     a   -0.305302
            b   -0.904743
            c   -1.913191
C     X     a    0.171854
            b    0.646224
            c   -0.892278
      Y     a    0.502728
            b   -1.437249
            c    0.075185
dtype: float64

![image.png](attachment:image.png)

### df.unstack( )

In [45]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c
ind1,ind2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,X,-1.128599,0.362903,0.078245
A,Y,0.110144,-0.04171,0.309239
B,X,1.205815,-1.698855,0.552461
B,Y,-0.305302,-0.904743,-1.913191
C,X,0.171854,0.646224,-0.892278
C,Y,0.502728,-1.437249,0.075185


![image.png](attachment:image.png)
### stacked.unstack(1) veya stacked.unstack('second')

In [47]:
stacked.unstack(1)

Unnamed: 0_level_0,ind2,X,Y
ind1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,a,-1.128599,0.110144
A,b,0.362903,-0.04171
A,c,0.078245,0.309239
B,a,1.205815,-0.305302
B,b,-1.698855,-0.904743
B,c,0.552461,-1.913191
C,a,0.171854,0.502728
C,b,0.646224,-1.437249
C,c,-0.892278,0.075185


![image.png](attachment:image.png)
### stacked.unstack(0) veya stacked.unstack("first")

In [48]:
stacked.unstack(0)

Unnamed: 0_level_0,ind1,A,B,C
ind2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
X,a,-1.128599,1.205815,0.171854
X,b,0.362903,-1.698855,0.646224
X,c,0.078245,0.552461,-0.892278
Y,a,0.110144,-0.305302,0.502728
Y,b,-0.04171,-0.904743,-1.437249
Y,c,0.309239,-1.913191,0.075185


## Uygulama 3:
![image.png](attachment:image.png)
- Bu DataFrame oluşturun.
- Daha sonra Data Framei sıasıyla aşağıdaki formatlara dönüştürün
![image-3.png](attachment:image-3.png)
![image-4.png](attachment:image-4.png)
![image-5.png](attachment:image-5.png)


#### Multi columns arasından istenilen columna göre stack yapmak
#### df.stack(level=["ind2", "ind3"]) or df.stack(level=[1, 2])

In [68]:
col = pd.MultiIndex.from_product([['A','B'],['X','Y'],['a','b']], names=['ind1','ind2','ind3'])
df = pd.DataFrame(np.random.randn(4,8), columns=col)
df

ind1,A,A,A,A,B,B,B,B
ind2,X,X,Y,Y,X,X,Y,Y
ind3,a,b,a,b,a,b,a,b
0,1.184508,0.690179,-0.177093,-0.254549,-0.262884,-0.617884,0.169093,1.565812
1,-0.831907,0.978967,-0.973097,1.519644,0.508023,0.716751,-0.443354,0.745849
2,-0.17225,2.177943,1.302542,-0.818458,0.074492,-0.202689,-0.314494,0.921514
3,-0.185581,1.281164,1.606372,0.314996,0.382564,-0.841158,0.90954,2.054512


In [72]:
df.stack(level=[1, 2])

Unnamed: 0_level_0,Unnamed: 1_level_0,ind1,A,B
Unnamed: 0_level_1,ind2,ind3,Unnamed: 3_level_1,Unnamed: 4_level_1
0,X,a,1.184508,-0.262884
0,X,b,0.690179,-0.617884
0,Y,a,-0.177093,0.169093
0,Y,b,-0.254549,1.565812
1,X,a,-0.831907,0.508023
1,X,b,0.978967,0.716751
1,Y,a,-0.973097,-0.443354
1,Y,b,1.519644,0.745849
2,X,a,-0.17225,0.074492
2,X,b,2.177943,-0.202689


### Multi column ve multi index içeren data frame

In [75]:
columns = pd.MultiIndex.from_product([['A','B'],['a','b']], names=['var1','var2'])

index = pd.MultiIndex.from_product([['X','Y'],['q','z']], names=['ind1','ind2'])

In [77]:
df = pd.DataFrame(np.random.randn(4,4), index=index, columns=columns)
df

Unnamed: 0_level_0,var1,A,A,B,B
Unnamed: 0_level_1,var2,a,b,a,b
X,q,-0.874218,-1.629557,1.804819,-0.232313
X,z,1.286935,2.07751,0.884554,-0.083853
Y,q,-0.609363,-2.602274,-0.098873,2.031487
Y,z,0.057484,-1.61716,0.611673,-0.153653


In [73]:
columns = pd.MultiIndex.from_tuples(
        [
            ("A", "cat"),
            ("B", "dog"),
            ("B", "cat"),
            ("A", "dog"),
        ],
        names=["exp", "animal"],
    )
    

index = pd.MultiIndex.from_product(
        [("bar", "baz", "foo", "qux"), ("one", "two")], names=["first", "second"]
    )
    

df = pd.DataFrame(np.random.randn(8, 4), index=index, columns=columns)

df

Unnamed: 0_level_0,exp,A,B,B,A
Unnamed: 0_level_1,animal,cat,dog,cat,dog
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
bar,one,0.358336,0.13463,-1.279963,0.965019
bar,two,1.576503,0.322163,0.355534,-1.17707
baz,one,-0.151948,-0.387263,0.159295,1.718094
baz,two,-1.804565,0.969408,1.346022,1.557724
foo,one,-1.034702,0.701172,1.848454,-0.424971
foo,two,1.395932,0.739375,0.170753,-2.131255
qux,one,0.63644,-0.012276,1.380217,-0.112626
qux,two,0.989317,-1.059877,0.673573,0.726112


### stack

In [85]:
df

Unnamed: 0_level_0,var1,A,A,B,B
Unnamed: 0_level_1,var2,a,b,a,b
X,q,-0.874218,-1.629557,1.804819,-0.232313
X,z,1.286935,2.07751,0.884554,-0.083853
Y,q,-0.609363,-2.602274,-0.098873,2.031487
Y,z,0.057484,-1.61716,0.611673,-0.153653


In [86]:
df.stack('var1')

Unnamed: 0_level_0,Unnamed: 1_level_0,var2,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,var1,Unnamed: 3_level_1,Unnamed: 4_level_1
X,q,A,-0.874218,-1.629557
X,q,B,1.804819,-0.232313
X,z,A,1.286935,2.07751
X,z,B,0.884554,-0.083853
Y,q,A,-0.609363,-2.602274
Y,q,B,-0.098873,2.031487
Y,z,A,0.057484,-1.61716
Y,z,B,0.611673,-0.153653


In [87]:
df.stack('var2')

Unnamed: 0_level_0,Unnamed: 1_level_0,var1,A,B
Unnamed: 0_level_1,Unnamed: 1_level_1,var2,Unnamed: 3_level_1,Unnamed: 4_level_1
X,q,a,-0.874218,1.804819
X,q,b,-1.629557,-0.232313
X,z,a,1.286935,0.884554
X,z,b,2.07751,-0.083853
Y,q,a,-0.609363,-0.098873
Y,q,b,-2.602274,2.031487
Y,z,a,0.057484,0.611673
Y,z,b,-1.61716,-0.153653


### Column ve index filitre

In [83]:
df1= df.iloc[[0, 1, 3], [1, 2]]
df1

Unnamed: 0_level_0,var1,A,B
Unnamed: 0_level_1,var2,b,a
X,q,-1.629557,1.804819
X,z,2.07751,0.884554
Y,z,-1.61716,0.611673


### Missing Data

Alt gruplar aynı etiket setine sahip değilse, yığının kaldırılması eksik değerlere neden olabilir. Varsayılan olarak, eksik değerler o veri türü için varsayılan dolgu değeriyle, kayan nokta için NaN, tarih saat benzeri için NaT vb. ile değiştirilecektir. Tamsayı türleri için, varsayılan olarak veriler kayan nokta değerine dönüştürülecek ve eksik değerler NaN olarak ayarlanacaktır.

In [96]:
df1

Unnamed: 0_level_0,var1,A,B
Unnamed: 0_level_1,var2,b,a
X,q,-1.629557,1.804819
X,z,2.07751,0.884554
Y,z,-1.61716,0.611673


In [84]:
df1.unstack()

var1,A,A,B,B
var2,b,b,a,a
Unnamed: 0_level_2,q,z,q,z
X,-1.629557,2.07751,1.804819,0.884554
Y,,-1.61716,,0.611673


### df3.unstack(fill_value=-1e9)
Alternatif olarak, unstack, eksik verilerin değerini belirtmek için fill_value parametresi kullanılır.

In [95]:
df1.unstack(fill_value=-1e9)

var1,A,A,B,B
var2,b,b,a,a
Unnamed: 0_level_2,q,z,q,z
X,-1.629557,2.07751,1.804819,0.884554
Y,-1000000000.0,-1.61716,-1000000000.0,0.611673


In [82]:
df.unstack()

var1,A,A,A,A,B,B,B,B
var2,a,a,b,b,a,a,b,b
Unnamed: 0_level_2,q,z,q,z,q,z,q,z
X,-0.874218,1.286935,-1.629557,2.07751,1.804819,0.884554,-0.232313,-0.083853
Y,-0.609363,0.057484,-2.602274,-1.61716,-0.098873,0.611673,2.031487,-0.153653


# Reshaping by melt
![image.png](attachment:image.png)

In [98]:
df = pd.DataFrame(
    {
        'Voor':['Jip','Janneke'],
        'Achter':['Boos','Rustig'],
        'lengte':[180,193],
        'gewicht': [89,92]
    }
)
df

Unnamed: 0,Voor,Achter,lengte,gewicht
0,Jip,Boos,180,89
1,Janneke,Rustig,193,92


### df.melt(id_vars=['Voor','Achter'])

In [101]:
df.melt(id_vars=['Voor','Achter'])

Unnamed: 0,Voor,Achter,variable,value
0,Jip,Boos,lengte,180
1,Janneke,Rustig,lengte,193
2,Jip,Boos,gewicht,89
3,Janneke,Rustig,gewicht,92


veya 
### pd.melt(df, id_vars =['Voor','Achter'], value_vars =['lengte', 'gewicht'])

In [113]:
pd.melt(df, id_vars =['Voor','Achter'], value_vars =['lengte', 'gewicht'])

Unnamed: 0,Voor,Achter,variable,value
0,Jip,Boos,lengte,180
1,Janneke,Rustig,lengte,193
2,Jip,Boos,gewicht,89
3,Janneke,Rustig,gewicht,92


#### Yeni değişken isimlerini belirleme

In [109]:
df.melt(id_vars=['Voor','Achter'], var_name='var1', value_name='var2')

Unnamed: 0,Voor,Achter,var1,var2
0,Jip,Boos,lengte,180
1,Janneke,Rustig,lengte,193
2,Jip,Boos,gewicht,89
3,Janneke,Rustig,gewicht,92


In [112]:
pd.melt(df, id_vars =['Voor','Achter'], value_vars =['lengte', 'gewicht'])

Unnamed: 0,Voor,Achter,variable,value
0,Jip,Boos,lengte,180
1,Janneke,Rustig,lengte,193
2,Jip,Boos,gewicht,89
3,Janneke,Rustig,gewicht,92


## Uygulama 4
![image.png](attachment:image.png)
- Yukardaki data frame oluşturun ve aşağıdaki formata dönüştürün. 
- Yeni oluşacak değişken isimlerini var1 ve var2 olarak atayın
![image-2.png](attachment:image-2.png)

In [119]:
np.array([1,2,3,4,5,6,7,8,9,0,11,12,13,14,15,16]).reshape(4,4)

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9,  0, 11, 12],
       [13, 14, 15, 16]])

# Combining with statistic and GroupBy
Özet tablolar oluştururken istatistik methodların kullanılması

In [121]:
columns = pd.MultiIndex.from_product([['A','B'],['a','b']], names=['var1','var2'])

index = pd.MultiIndex.from_product([['Q','T'],['q','t']],names=['ind1','ind2'])

data = np.array([1,2,3,4,5,6,7,8,9,0,11,12,13,14,15,16]).reshape(4,4)

df = pd.DataFrame(data, index=index, columns=columns)
df

Unnamed: 0_level_0,var1,A,A,B,B
Unnamed: 0_level_1,var2,a,b,a,b
ind1,ind2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Q,q,1,2,3,4
Q,t,5,6,7,8
T,q,9,0,11,12
T,t,13,14,15,16


#### var1 değişkenlerine göre ortalamalar alınarak özet tablo oluşturulması

In [125]:
df.stack().mean(1).unstack()

Unnamed: 0_level_0,var2,a,b
ind1,ind2,Unnamed: 2_level_1,Unnamed: 3_level_1
Q,q,2.0,3.0
Q,t,6.0,7.0
T,q,10.0,6.0
T,t,14.0,15.0


#### Groupby ile yukardaki sonucun elde edilmesi

In [126]:
df.groupby(level=1, axis=1).mean()

Unnamed: 0_level_0,var2,a,b
ind1,ind2,Unnamed: 2_level_1,Unnamed: 3_level_1
Q,q,2,3
Q,t,6,7
T,q,10,6
T,t,14,15


#### Groupby methodunda level ve axis parametrelerindeki değişiklikler ile elde edilecek farklı sonuçlar

In [127]:
df.groupby(level=0, axis=1).mean()

Unnamed: 0_level_0,var1,A,B
ind1,ind2,Unnamed: 2_level_1,Unnamed: 3_level_1
Q,q,1.5,3.5
Q,t,5.5,7.5
T,q,4.5,11.5
T,t,13.5,15.5


In [128]:
df.groupby(level=1, axis=0).mean()

var1,A,A,B,B
var2,a,b,a,b
ind2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
q,5,1,7,8
t,9,10,11,12


# Pivot tables
Yıkarıda pivot methodu ile data frameleri yeniden şekillendirmeyi gördük. Şimdi ise pivot_table methodu ile istatistiki yöntemler kullanarak özet tablolar oluşturmayı öğreneceğiz
It takes a number of arguments:

data: a DataFrame object.

values: a column or a list of columns to aggregate.

index: a column, Grouper, array which has the same length as data, or list of them. Keys to group by on the pivot table index. If an array is passed, it is being used as the same manner as column values.

columns: a column, Grouper, array which has the same length as data, or list of them. Keys to group by on the pivot table column. If an array is passed, it is being used as the same manner as column values.

aggfunc: function to use for aggregation, defaulting to numpy.mean.

In [146]:
df = pd.DataFrame(
        {
            "var1": ["X", "X", "Y", "Z"] * 2,
            "var2": ["A", "B","B", "C"] * 2,
            "var3": ["a", "b"] * 4,
            "var4": np.array([0,1,2,3,4,5,6,7]),
            "var5": np.array([10,11,12,13,14,15,16,17])
        }
    )
df

Unnamed: 0,var1,var2,var3,var4,var5
0,X,A,a,0,10
1,X,B,b,1,11
2,Y,B,a,2,12
3,Z,C,b,3,13
4,X,A,a,4,14
5,X,B,b,5,15
6,Y,B,a,6,16
7,Z,C,b,7,17


In [148]:
pd.pivot_table(df, values="var4", index="var1")

Unnamed: 0_level_0,var4
var1,Unnamed: 1_level_1
X,2.5
Y,4.0
Z,5.0


Yukarıdaki kod ile 
#### pd.pivot_table(df, values="D", index="A",aggfunc=numpy.mean)
aynı sonucu verecektir. aggfunc default değeri numpy.mean dir

In [149]:
pd.pivot_table(df, values="var4", index="var1",aggfunc=np.mean)

Unnamed: 0_level_0,var4
var1,Unnamed: 1_level_1
X,2.5
Y,4.0
Z,5.0


#### numpy.mean nerine numpy.sum 

In [150]:
pd.pivot_table(df, values="var4", index="var1",aggfunc=np.sum)

Unnamed: 0_level_0,var4
var1,Unnamed: 1_level_1
X,10
Y,8
Z,10


In [137]:
df

Unnamed: 0,A,B,C,D,E
0,X,A,a,0,10
1,X,B,b,1,11
2,Y,B,a,2,12
3,Z,C,b,3,13
4,X,A,a,4,14
5,X,B,b,5,15
6,Y,B,a,6,16
7,Z,C,b,7,17


In [152]:
pd.pivot_table(df, values="var4", index='var1', columns='var3')

var3,a,b
var1,Unnamed: 1_level_1,Unnamed: 2_level_1
X,2.0,3.0
Y,4.0,
Z,,5.0


In [153]:
df

Unnamed: 0,var1,var2,var3,var4,var5
0,X,A,a,0,10
1,X,B,b,1,11
2,Y,B,a,2,12
3,Z,C,b,3,13
4,X,A,a,4,14
5,X,B,b,5,15
6,Y,B,a,6,16
7,Z,C,b,7,17


#### pd.pivot_table(df, values="var4", index=["var1", "var2"], columns=["var3"])

In [154]:
pd.pivot_table(df, values="var4", index=["var1", "var2"], columns=["var3"])

Unnamed: 0_level_0,var3,a,b
var1,var2,Unnamed: 2_level_1,Unnamed: 3_level_1
X,A,2.0,
X,B,,3.0
Y,B,4.0,
Z,C,,5.0


In [155]:
df

Unnamed: 0,var1,var2,var3,var4,var5
0,X,A,a,0,10
1,X,B,b,1,11
2,Y,B,a,2,12
3,Z,C,b,3,13
4,X,A,a,4,14
5,X,B,b,5,15
6,Y,B,a,6,16
7,Z,C,b,7,17


In [158]:
pd.pivot_table(df, values="var4", index=["var2"], columns=["var1", "var3"], aggfunc=np.sum)

var1,X,X,Y,Z
var3,a,b,a,b
var2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,4.0,,,
B,,6.0,8.0,
C,,,,10.0


In [164]:
pd.pivot_table(df, values=["var4",'var5'], index=["var2"],aggfunc={'var4': np.mean,'var5': np.sum})

Unnamed: 0_level_0,var4,var5
var2,Unnamed: 1_level_1,Unnamed: 2_level_1
A,2.0,24
B,3.5,54
C,5.0,30


## Uygulama 5:

In [167]:
df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
                         "bar", "bar", "bar", "bar"],
                   "B": ["one", "one", "one", "two", "two",
                         "one", "one", "two", "two"],
                   "C": ["small", "large", "large", "small",
                         "small", "large", "small", "small",
                         "large"],
                   "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
                   "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]})
df

Unnamed: 0,A,B,C,D,E
0,foo,one,small,1,2
1,foo,one,large,2,4
2,foo,one,large,2,5
3,foo,two,small,3,5
4,foo,two,small,3,6
5,bar,one,large,4,6
6,bar,one,small,5,8
7,bar,two,small,6,9
8,bar,two,large,7,9


data frame ile aşağıdaki dataframeleri oluşturnuz
![image.png](attachment:image.png)
![image-2.png](attachment:image-2.png)
![image-3.png](attachment:image-3.png)

In [169]:
table = pd.pivot_table(df, values='D', index=['A', 'B'],
                    columns=['C'], aggfunc=np.sum)
table

Unnamed: 0_level_0,C,large,small
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,4.0,5.0
bar,two,7.0,6.0
foo,one,4.0,1.0
foo,two,,6.0


In [170]:
table = pd.pivot_table(df, values='D', index=['A', 'B'],
                    columns=['C'], aggfunc=np.sum, fill_value=0)
table

Unnamed: 0_level_0,C,large,small
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,4,5
bar,two,7,6
foo,one,4,1
foo,two,0,6


In [171]:
table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'],
                    aggfunc={'D': np.mean,
                             'E': [min, max, np.mean]})
table

Unnamed: 0_level_0,Unnamed: 1_level_0,D,E,E,E
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,max,mean,min
A,C,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
bar,large,5.5,9.0,7.5,6.0
bar,small,5.5,9.0,8.5,8.0
foo,large,2.0,5.0,4.5,4.0
foo,small,2.333333,6.0,4.333333,2.0
