# Pandas

###  *Pandas Serileri 

In [1]:
import numpy as np
import pandas as pd

In [2]:
labels_list=["Mustafa","Kemal","Murat","Kadir","Zeynep"]

In [3]:
data_list=[10,20,30,40,50]

In [4]:
pd.Series(data=data_list,index=labels_list)

Mustafa    10
Kemal      20
Murat      30
Kadir      40
Zeynep     50
dtype: int64

In [5]:
pd.Series(data=data_list)

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [6]:
npArray=np.array([10,20,30,40,50])

In [7]:
pd.Series(npArray,labels_list)

Mustafa    10
Kemal      20
Murat      30
Kadir      40
Zeynep     50
dtype: int32

In [8]:
pd.Series(data=npArray,index=["A","B","C","D","E"])

A    10
B    20
C    30
D    40
E    50
dtype: int32

In [9]:
dataDict={"Kadir":30,"Kemal":80,"Kamuran":60}

In [10]:
pd.Series(dataDict)

Kadir      30
Kemal      80
Kamuran    60
dtype: int64

In [11]:
ser2017=pd.Series([5,10,14,20],["Buğday","Mısır","Kiraz","Erik"])

In [12]:
ser2017

Buğday     5
Mısır     10
Kiraz     14
Erik      20
dtype: int64

In [13]:
ser2018=pd.Series([2,12,12,6],["Buğday","Mısır","Çilek","Erik"])

In [14]:
ser2018

Buğday     2
Mısır     12
Çilek     12
Erik       6
dtype: int64

In [15]:
ser2017+ser2018

Buğday     7.0
Erik      26.0
Kiraz      NaN
Mısır     22.0
Çilek      NaN
dtype: float64

In [16]:
total=ser2017+ser2018

In [17]:
total

Buğday     7.0
Erik      26.0
Kiraz      NaN
Mısır     22.0
Çilek      NaN
dtype: float64

In [18]:
total["Erik"]

26.0

In [19]:
total["Çilek"]

nan

### DataFrame'lerin Tanımlanması

In [20]:
import numpy as np
import pandas as pd

In [21]:
from numpy.random import randn

In [22]:
randn(3)

array([ 0.00356649, -1.72418175,  0.5000077 ])

In [23]:
randn(3,3)

array([[ 1.11775335, -0.78077168,  0.88471855],
       [-1.2022778 , -0.78623697,  0.56596568],
       [ 0.09084937,  0.53320219, -0.77482167]])

In [24]:
df=pd.DataFrame(data=randn(3,3),index=["A","B","C"],
               columns=["Column1","Column2","Column3"])

In [25]:
df

Unnamed: 0,Column1,Column2,Column3
A,2.135064,0.663023,0.115105
B,-0.122282,1.47783,-0.258741
C,0.423948,0.150711,-0.0937


In [26]:
df["Column1"]

A    2.135064
B   -0.122282
C    0.423948
Name: Column1, dtype: float64

In [27]:
df[["Column1","Column2"]]

Unnamed: 0,Column1,Column2
A,2.135064,0.663023
B,-0.122282,1.47783
C,0.423948,0.150711


In [28]:
df.loc["A"]

Column1    2.135064
Column2    0.663023
Column3    0.115105
Name: A, dtype: float64

In [29]:
df

Unnamed: 0,Column1,Column2,Column3
A,2.135064,0.663023,0.115105
B,-0.122282,1.47783,-0.258741
C,0.423948,0.150711,-0.0937


In [30]:
df["Column4"]=pd.Series(randn(3),["A","B","C"])

In [31]:
df

Unnamed: 0,Column1,Column2,Column3,Column4
A,2.135064,0.663023,0.115105,0.34929
B,-0.122282,1.47783,-0.258741,-1.629315
C,0.423948,0.150711,-0.0937,1.354057


In [32]:
df["Column5"]=df["Column1"]+df["Column2"]+df["Column3"]+df["Column4"]

In [33]:
df

Unnamed: 0,Column1,Column2,Column3,Column4,Column5
A,2.135064,0.663023,0.115105,0.34929,3.262482
B,-0.122282,1.47783,-0.258741,-1.629315,-0.532509
C,0.423948,0.150711,-0.0937,1.354057,1.835017


In [34]:
df.drop("Column5",axis=1,inplace=True) 
# drop() fonksiyonunda shift+tab yaparsan default değer olarak axis=0 girildiğini görürsün
# column silmek istediğimiz için axis=1 girmek zorundayız [(axis=0 X ekseni)(axis=1 Y ekseni)]
# inplace=True ile dataFrame miz güncellenir

In [35]:
df

Unnamed: 0,Column1,Column2,Column3,Column4
A,2.135064,0.663023,0.115105,0.34929
B,-0.122282,1.47783,-0.258741,-1.629315
C,0.423948,0.150711,-0.0937,1.354057


In [36]:
df["Column1"] # eğer column a erişmek istersek bunu kullanabiliriz ancak index lere erişmek için loc[] kullanmalıyız

A    2.135064
B   -0.122282
C    0.423948
Name: Column1, dtype: float64

In [37]:
df.loc["A"]

Column1    2.135064
Column2    0.663023
Column3    0.115105
Column4    0.349290
Name: A, dtype: float64

In [38]:
df.iloc[0]         #datamızdaki 0. index'ini çağırmış olduk

Column1    2.135064
Column2    0.663023
Column3    0.115105
Column4    0.349290
Name: A, dtype: float64

In [39]:
df.loc["A","Column1"] # A indexine gidip columnn1 değerini alır

2.135064422280131

In [40]:
df.loc[["A","B"],["Column1","Column2"]]

Unnamed: 0,Column1,Column2
A,2.135064,0.663023
B,-0.122282,1.47783


## Filtreleme işlemleri

In [41]:
import numpy as np
import pandas as pd
from numpy.random import randn

In [42]:
df=pd.DataFrame(randn(4,3),
               index=["A","B","C","D"],
               columns=["Column1","Column2","Column3"])

In [43]:
df

Unnamed: 0,Column1,Column2,Column3
A,-1.262708,1.263249,-1.310027
B,-1.056042,-1.354443,-0.919764
C,-0.546322,-0.088691,0.425517
D,0.056451,-2.365515,-0.715259


In [44]:
df>0

Unnamed: 0,Column1,Column2,Column3
A,False,True,False
B,False,False,False
C,False,False,True
D,True,False,False


In [45]:
booleanDf=df>0

In [46]:
booleanDf

Unnamed: 0,Column1,Column2,Column3
A,False,True,False
B,False,False,False
C,False,False,True
D,True,False,False


In [47]:
df[booleanDf]

Unnamed: 0,Column1,Column2,Column3
A,,1.263249,
B,,,
C,,,0.425517
D,0.056451,,


In [48]:
df[df>0] # aynı işe yarıyor

Unnamed: 0,Column1,Column2,Column3
A,,1.263249,
B,,,
C,,,0.425517
D,0.056451,,


In [49]:
df

Unnamed: 0,Column1,Column2,Column3
A,-1.262708,1.263249,-1.310027
B,-1.056042,-1.354443,-0.919764
C,-0.546322,-0.088691,0.425517
D,0.056451,-2.365515,-0.715259


In [50]:
df["Column1"]

A   -1.262708
B   -1.056042
C   -0.546322
D    0.056451
Name: Column1, dtype: float64

In [51]:
df["Column1"]>0 # A ve D indexlerimiz False döndürdü

A    False
B    False
C    False
D     True
Name: Column1, dtype: bool

In [52]:
df[df["Column1"]>0] # önceki işlemimizde A ve D indexleri False dönmüştü.Burada False değerlerini arrayden çıkardık

Unnamed: 0,Column1,Column2,Column3
D,0.056451,-2.365515,-0.715259


In [53]:
df["Column1"]>0.5

A    False
B    False
C    False
D    False
Name: Column1, dtype: bool

In [58]:
df[df["Column1"]>-1]

Unnamed: 0,Column1,Column2,Column3
C,-0.546322,-0.088691,0.425517
D,0.056451,-2.365515,-0.715259


In [59]:
df[df["Column2"]>-1]

Unnamed: 0,Column1,Column2,Column3
A,-1.262708,1.263249,-1.310027
C,-0.546322,-0.088691,0.425517


In [62]:
df[(df["Column1"]>-1) & (df["Column2"]>-1)] # pandas da and koyamıyorum & ile hallediyoruz!!

Unnamed: 0,Column1,Column2,Column3
C,-0.546322,-0.088691,0.425517


In [63]:
df[(df["Column1"]>-1) | (df["Column2"]>-1)] # pandas da and koyamıyorum | ile hallediyoruz!!

Unnamed: 0,Column1,Column2,Column3
A,-1.262708,1.263249,-1.310027
C,-0.546322,-0.088691,0.425517
D,0.056451,-2.365515,-0.715259


In [64]:
df

Unnamed: 0,Column1,Column2,Column3
A,-1.262708,1.263249,-1.310027
B,-1.056042,-1.354443,-0.919764
C,-0.546322,-0.088691,0.425517
D,0.056451,-2.365515,-0.715259


In [66]:
df["Column4"]=pd.Series(randn(4),["A","B","C","D"])
df 
# yeni column ekledik

Unnamed: 0,Column1,Column2,Column3,Column4
A,-1.262708,1.263249,-1.310027,-1.687638
B,-1.056042,-1.354443,-0.919764,0.811433
C,-0.546322,-0.088691,0.425517,0.414177
D,0.056451,-2.365515,-0.715259,-0.913467


In [68]:
# column eklemenin kolay yolu
df["Column5"]=randn(4)
df

Unnamed: 0,Column1,Column2,Column3,Column4,Column5
A,-1.262708,1.263249,-1.310027,-1.687638,-1.400855
B,-1.056042,-1.354443,-0.919764,0.811433,1.091082
C,-0.546322,-0.088691,0.425517,0.414177,0.824431
D,0.056451,-2.365515,-0.715259,-0.913467,1.083156


In [69]:
df["Column6"]=["newValue1","newValue2","newValue3","newValue4"]

In [72]:
df

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
A,-1.262708,1.263249,-1.310027,-1.687638,-1.400855,newValue1
B,-1.056042,-1.354443,-0.919764,0.811433,1.091082,newValue2
C,-0.546322,-0.088691,0.425517,0.414177,0.824431,newValue3
D,0.056451,-2.365515,-0.715259,-0.913467,1.083156,newValue4


### set_index

In [73]:
df.set_index("Column6") # A,B,C,D yi column6 daki stringle değiştirdik

Unnamed: 0_level_0,Column1,Column2,Column3,Column4,Column5
Column6,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
newValue1,-1.262708,1.263249,-1.310027,-1.687638,-1.400855
newValue2,-1.056042,-1.354443,-0.919764,0.811433,1.091082
newValue3,-0.546322,-0.088691,0.425517,0.414177,0.824431
newValue4,0.056451,-2.365515,-0.715259,-0.913467,1.083156


In [74]:
df # inplace=True yapmadığımız için

Unnamed: 0,Column1,Column2,Column3,Column4,Column5,Column6
A,-1.262708,1.263249,-1.310027,-1.687638,-1.400855,newValue1
B,-1.056042,-1.354443,-0.919764,0.811433,1.091082,newValue2
C,-0.546322,-0.088691,0.425517,0.414177,0.824431,newValue3
D,0.056451,-2.365515,-0.715259,-0.913467,1.083156,newValue4


In [75]:
df.set_index("Column6",inplace=True)

In [77]:
df

Unnamed: 0_level_0,Column1,Column2,Column3,Column4,Column5
Column6,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
newValue1,-1.262708,1.263249,-1.310027,-1.687638,-1.400855
newValue2,-1.056042,-1.354443,-0.919764,0.811433,1.091082
newValue3,-0.546322,-0.088691,0.425517,0.414177,0.824431
newValue4,0.056451,-2.365515,-0.715259,-0.913467,1.083156


In [80]:
df.index.names # bir tane index grubu olduğu için sadece column6 döndürdü

FrozenList(['Column6'])