# Pandas Serisi Olusturma 

In [14]:
import pandas as pd

In [15]:
pd.Series([1,2,3,4,5])
#veriler indexleri ile beraber tutulurlar pandasta 

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [16]:
seri = pd.Series([1,2,3,4,5])

In [17]:
type(seri)
# tip bilgisi

pandas.core.series.Series

In [18]:
seri.axes
# serinin index bilgisine erisiyoruz stop=5'de 5 dahil degil

[RangeIndex(start=0, stop=5, step=1)]

In [19]:
seri.dtype
# serinin icerisindeki degerler ile ilgili bilgi

dtype('int64')

In [20]:
seri.size
# serinin icerisindeki eleman sayisini veriyor

5

In [21]:
seri.ndim
# serinin boyutunu degerlendirebiliyoruz yani burada 1 boyutlu oldugu anlaminda

1

In [22]:
seri.values
# array formunda sadece deger bilgisine esit

array([1, 2, 3, 4, 5], dtype=int64)

In [23]:
seri.head()
# ilk bes elemanini verir eger seri.head(3) dersek ilk 3 elemanini verir

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [24]:
seri.tail(3)
# sondan 3 elemanini verir

2    3
3    4
4    5
dtype: int64

In [25]:
# index isimlendirmesi

In [26]:
pd.Series([100,245,365,456,534], index = ["a","b","c","d","e"])

a    100
b    245
c    365
d    456
e    534
dtype: int64

In [27]:
seri = pd.Series([100,245,365,456,534], index = ["a","b","c","d","e"])

In [28]:
seri["a"]
# a'nin karsilik geldigi degere ulasmamizi saglar

100

In [29]:
seri["a":"c"]
# slicing islemi 

a    100
b    245
c    365
dtype: int64

# Sozluk Uzerinden Liste Olusturmak 

In [30]:
sozluk = pd.Series({"reg":10, "log":11, "cart":12})

In [31]:
seri = pd.Series(sozluk)

In [32]:
seri

reg     10
log     11
cart    12
dtype: int64

In [33]:
# sozluk = pd.Series({"reg":10, "log":11, "cart":12}) Ayni islem

In [34]:
pd.concat([seri,seri])

reg     10
log     11
cart    12
reg     10
log     11
cart    12
dtype: int64

# Eleman Islemleri

In [35]:
import numpy as np

In [36]:
a = np.array([1,2,33,44,546])
seri = pd.Series(a)
seri

0      1
1      2
2     33
3     44
4    546
dtype: int32

In [37]:
seri[0]
#0. indexteki elemani sececek 

1

In [38]:
seri[0:3]

0     1
1     2
2    33
dtype: int32

In [39]:
seri = pd.Series([121,234,4545,456], 
                 index = ["reg", "log", "cart", "rf"])

In [40]:
seri

reg      121
log      234
cart    4545
rf       456
dtype: int64

In [41]:
seri.index
# sadece index degerlerine erismek istiyorsak 

Index(['reg', 'log', 'cart', 'rf'], dtype='object')

In [42]:
seri.keys
# keys degerlerini getirecektir

<bound method Series.keys of reg      121
log      234
cart    4545
rf       456
dtype: int64>

In [43]:
list(seri.items())
# her bir key degerine karsilik gelen degerleri gelir

[('reg', 121), ('log', 234), ('cart', 4545), ('rf', 456)]

In [44]:
seri.values
#serinin sadece value'larina erisiriz 

array([ 121,  234, 4545,  456], dtype=int64)

In [45]:
"reg" in seri

True

In [46]:
"a" in seri

False

In [47]:
seri["reg"]

121

In [48]:
# fancy eleman

In [49]:
seri[["rf","reg"]]

rf     456
reg    121
dtype: int64

In [50]:
seri["reg"] = 132

In [51]:
seri["reg"]

132

In [52]:
seri["reg":"log"]

reg    132
log    234
dtype: int64

# Pandas DataFrame Olusturma

In [53]:
# Yapisal veri tipi

In [54]:
liste = [13,234,345,464,435]

In [55]:
liste

[13, 234, 345, 464, 435]

In [56]:
pd.DataFrame(liste, columns = ["degisken_adi"])

Unnamed: 0,degisken_adi
0,13
1,234
2,345
3,464
4,435


In [57]:
m = np.arange(1,10).reshape(3,3)
m

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [58]:
pd.DataFrame(m, columns = ["var1","var2","var3"])

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [59]:
df = pd.DataFrame(m, columns = ["var1","var2","var3"])
df.head(2)

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6


In [60]:
df.columns = ("degisken1","degisken2","degisken3")

In [61]:
df

Unnamed: 0,degisken1,degisken2,degisken3
0,1,2,3
1,4,5,6
2,7,8,9


In [62]:
type(df)

pandas.core.frame.DataFrame

In [63]:
df.axes

[RangeIndex(start=0, stop=3, step=1),
 Index(['degisken1', 'degisken2', 'degisken3'], dtype='object')]

In [64]:
df.shape

(3, 3)

In [65]:
df.ndim
# 2 boyutlu oldugu anlamina gelir

2

In [66]:
df.size
# eleman sayisini vermis oldu

9

In [67]:
df.values
# sadece degiskenlerin degerlerine erismek istersem 

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [68]:
type(df.values)

numpy.ndarray

In [69]:
df.head()

Unnamed: 0,degisken1,degisken2,degisken3
0,1,2,3
1,4,5,6
2,7,8,9


In [70]:
df.tail(1)

Unnamed: 0,degisken1,degisken2,degisken3
2,7,8,9


In [71]:
a = np.array([1,2,3,4,5])
#bir numpy array i uzerinden de DataFrame olusturabildik

In [72]:
pd.DataFrame(a, columns = ["deg1"])

Unnamed: 0,deg1
0,1
1,2
2,3
3,4
4,5


# Pandas DataFrame Eleman Islemleri

In [73]:
import numpy as np
import pandas as pd
s1 = np.random.randint(10, size = 5)
s2 = np.random.randint(10, size = 5)
s3 = np.random.randint(10, size = 5)

In [74]:
sozluk = {"var1":s1, "var2":s2, "var3":s3}

In [75]:
sozluk

{'var1': array([3, 6, 4, 4, 9]),
 'var2': array([5, 0, 3, 4, 9]),
 'var3': array([9, 4, 5, 9, 6])}

In [76]:
df = pd.DataFrame(sozluk)

In [77]:
df

Unnamed: 0,var1,var2,var3
0,3,5,9
1,6,0,4
2,4,3,5
3,4,4,9
4,9,9,6


In [78]:
df[0:1]

Unnamed: 0,var1,var2,var3
0,3,5,9


In [79]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [80]:
df.index = ["a","b","c","d","e"]

In [81]:
df

Unnamed: 0,var1,var2,var3
a,3,5,9
b,6,0,4
c,4,3,5
d,4,4,9
e,9,9,6


In [82]:
df["c":"e"]

Unnamed: 0,var1,var2,var3
c,4,3,5
d,4,4,9
e,9,9,6


In [83]:
df.drop("a", axis = 0)
# axis = 0  demek satir'a gore sil demek 
# drop sil demek 

Unnamed: 0,var1,var2,var3
b,6,0,4
c,4,3,5
d,4,4,9
e,9,9,6


In [84]:
df
# tekrar df dedigimizde veri setinin ana yapisi uzerinde bi degisim olmadi 

Unnamed: 0,var1,var2,var3
a,3,5,9
b,6,0,4
c,4,3,5
d,4,4,9
e,9,9,6


In [85]:
df.drop("a", axis = 0, inplace = True)
# inplace True diyerek a yi tamamen sildik degisim kalici hale gelmis oldu
# Yani kalici bir sekilde DataFrame'e kayitli hale geldi

In [86]:
df

Unnamed: 0,var1,var2,var3
b,6,0,4
c,4,3,5
d,4,4,9
e,9,9,6


In [87]:
# fancy

In [88]:
l = ["c","e"]

In [89]:
df.drop(l, axis = 0)

Unnamed: 0,var1,var2,var3
b,6,0,4
d,4,4,9


In [90]:
# degiskenler icin

In [91]:
"var1" in df

True

In [92]:
l = ["var1", "var4", "var2"]

In [93]:
for i in l:
    print(i in df)

True
False
True


In [94]:
df

Unnamed: 0,var1,var2,var3
b,6,0,4
c,4,3,5
d,4,4,9
e,9,9,6


In [95]:
df["var1"]

b    6
c    4
d    4
e    9
Name: var1, dtype: int32

In [96]:
df["var4"] = df["var1"] / df["var2"]

In [97]:
df

Unnamed: 0,var1,var2,var3,var4
b,6,0,4,inf
c,4,3,5,1.333333
d,4,4,9,1.0
e,9,9,6,1.0


In [98]:
# degisken silmek 

In [99]:
df.drop("var4", axis = 1, inplace = True)

In [100]:
df

Unnamed: 0,var1,var2,var3
b,6,0,4
c,4,3,5
d,4,4,9
e,9,9,6


In [101]:
l = ["var1", "var2"]

In [102]:
df.drop(l, axis = 1)
# degiskenleri listeye cevirip sildim

Unnamed: 0,var3
b,4
c,5
d,9
e,6


# Gozlem ve Degisken Secimi : loc & iloc

In [106]:
# eger bir seyin nasil kullanildigini hatirlayamazsak 
?np.random.randint

In [108]:
import numpy as np
import pandas as pd
m = np.random.randint(1,30, size = (10, 3))
# 10 satir 3 sutundan olusan bir array olusturulmus
df = pd.DataFrame(m, columns = ["var1","var2","var3"])
df

Unnamed: 0,var1,var2,var3
0,27,27,14
1,6,19,6
2,4,19,15
3,23,12,15
4,9,21,5
5,25,25,22
6,4,7,18
7,19,24,20
8,28,9,10
9,24,12,11


In [109]:
# loc : tanimlandigi sekilde secim yapmak icin kullanilir

In [110]:
df.loc[0:3]

Unnamed: 0,var1,var2,var3
0,27,27,14
1,6,19,6
2,4,19,15
3,23,12,15


In [111]:
# iloc : alisik oldugumuz indeksleme mantigi ile secim yapar

In [112]:
df.iloc[0:3]

Unnamed: 0,var1,var2,var3
0,27,27,14
1,6,19,6
2,4,19,15


In [113]:
df.iloc[0,0]

27

In [114]:
df.iloc[:3,:2]

Unnamed: 0,var1,var2
0,27,27
1,6,19
2,4,19


In [115]:
df.loc[0:3,"var3"]

0    14
1     6
2    15
3    15
Name: var3, dtype: int32

In [116]:
df.iloc[0:3]["var3"]

0    14
1     6
2    15
Name: var3, dtype: int32

# Kosullu Eleman Islemleri 

In [117]:
import numpy as np
import pandas as pd
m = np.random.randint(1,30, size = (10, 3))
# 10 satir 3 sutundan olusan bir array olusturulmus
df = pd.DataFrame(m, columns = ["var1","var2","var3"])
df

Unnamed: 0,var1,var2,var3
0,23,22,15
1,17,9,3
2,9,10,24
3,8,20,4
4,15,23,13
5,12,2,12
6,29,27,8
7,6,8,23
8,19,18,19
9,24,25,29


In [118]:
df["var1"]

0    23
1    17
2     9
3     8
4    15
5    12
6    29
7     6
8    19
9    24
Name: var1, dtype: int32

In [119]:
df["var1"][0:2]

0    23
1    17
Name: var1, dtype: int32

In [120]:
df[0:2][["var1","var2"]]

Unnamed: 0,var1,var2
0,23,22
1,17,9


In [121]:
df

Unnamed: 0,var1,var2,var3
0,23,22,15
1,17,9,3
2,9,10,24
3,8,20,4
4,15,23,13
5,12,2,12
6,29,27,8
7,6,8,23
8,19,18,19
9,24,25,29


In [122]:
df[df.var1 > 15]["var1"]
# kosulu calistirmak istedigimizde bu dataframe'i temsil eden df simgesinden sonra [] ile kosulu girerek gerceklestiririz
# burda sol taraf bir dataframe sag tarafta ise bu dataframeden  var1'i sec demek

0    23
1    17
6    29
8    19
9    24
Name: var1, dtype: int32

In [123]:
# birden fazla kosul girmek istersek 
df[(df.var1 > 15) & (df.var3 < 10)]

Unnamed: 0,var1,var2,var3
1,17,9,3
6,29,27,8


In [124]:
df.loc[(df.var1 > 15), ["var1","var2"]]
# degisken secme bilgisi icin basina loc ifadesini yazariz`

Unnamed: 0,var1,var2
0,23,22
1,17,9
6,29,27
8,19,18
9,24,25


In [125]:
df.loc[(df.var1 > 15),["var1"]]
# tek degiskende bu sekilde olur

Unnamed: 0,var1
0,23
1,17
6,29
8,19
9,24


In [126]:
df[(df.var1 > 15)][["var1","var2"]]

Unnamed: 0,var1,var2
0,23,22
1,17,9
6,29,27
8,19,18
9,24,25


# Birlestirme (Join) Islemleri

In [127]:
import numpy as np
import pandas as pd
m = np.random.randint(1,30, size = (5, 3))
# 10 satir 3 sutundan olusan bir array olusturulmus
df1 = pd.DataFrame(m, columns = ["var1","var2","var3"])
df1

Unnamed: 0,var1,var2,var3
0,15,7,10
1,3,9,10
2,6,26,16
3,20,28,21
4,14,29,29


In [128]:
df2 = df1 + 99

In [129]:
df2

Unnamed: 0,var1,var2,var3
0,114,106,109
1,102,108,109
2,105,125,115
3,119,127,120
4,113,128,128


In [130]:
pd.concat([df1,df2])
# iki dataframe i birlestirdim fakat sonuc hosuma gitmedi 

Unnamed: 0,var1,var2,var3
0,15,7,10
1,3,9,10
2,6,26,16
3,20,28,21
4,14,29,29
0,114,106,109
1,102,108,109
2,105,125,115
3,119,127,120
4,113,128,128


In [131]:
pd.concat([df1,df2], ignore_index = True)
#indexleri gormezden gelip 0 dan olusturduk

Unnamed: 0,var1,var2,var3
0,15,7,10
1,3,9,10
2,6,26,16
3,20,28,21
4,14,29,29
5,114,106,109
6,102,108,109
7,105,125,115
8,119,127,120
9,113,128,128


In [132]:
df1.columns

Index(['var1', 'var2', 'var3'], dtype='object')

In [133]:
df2.columns = ["var1","var2","deg3"]

In [134]:
df2

Unnamed: 0,var1,var2,deg3
0,114,106,109
1,102,108,109
2,105,125,115
3,119,127,120
4,113,128,128


In [135]:
df1

Unnamed: 0,var1,var2,var3
0,15,7,10
1,3,9,10
2,6,26,16
3,20,28,21
4,14,29,29


In [136]:
pd.concat([df1,df2])

Unnamed: 0,var1,var2,var3,deg3
0,15,7,10.0,
1,3,9,10.0,
2,6,26,16.0,
3,20,28,21.0,
4,14,29,29.0,
0,114,106,,109.0
1,102,108,,109.0
2,105,125,,115.0
3,119,127,,120.0
4,113,128,,128.0


In [137]:
pd.concat([df1,df2], join = "inner", ignore_index = True)
# burada kesisimlerine gore birlestirdi
# index problemi de goz ardi edildi 

Unnamed: 0,var1,var2
0,15,7
1,3,9
2,6,26
3,20,28
4,14,29
5,114,106
6,102,108
7,105,125
8,119,127
9,113,128


# Ileri Birlestirme Islemleri

In [138]:
import pandas as pd

In [139]:
df1 = pd.DataFrame({'calisanlar':['Ali','Veli','Ayse','Fatma'],
                   'grup':['Muhasebe','Muhendislik','Muhendislik','IK']})

In [140]:
df1

Unnamed: 0,calisanlar,grup
0,Ali,Muhasebe
1,Veli,Muhendislik
2,Ayse,Muhendislik
3,Fatma,IK


In [141]:
df2 = pd.DataFrame({'calisanlar':['Ali','Veli','Ayse','Fatma'],
                   'ilk_giris':[2010,2012,2013,2019]})

In [142]:
df2

Unnamed: 0,calisanlar,ilk_giris
0,Ali,2010
1,Veli,2012
2,Ayse,2013
3,Fatma,2019


In [143]:
pd.merge(df1,df2)
# birebir birlestirme gerceklestirmis olduk
# merge fonksiyonu birlestirmelerin hangi degiskene gore yapilacagini kendisi anliyor

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2010
1,Veli,Muhendislik,2012
2,Ayse,Muhendislik,2013
3,Fatma,IK,2019


In [144]:
pd.merge(df1,df2, on = "calisanlar")
#eger  birlestirmenin neyin uzerinde yapilmasi gerektigini biz kendimiz belirliyorsak "on" kullanilir

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2010
1,Veli,Muhendislik,2012
2,Ayse,Muhendislik,2013
3,Fatma,IK,2019


In [145]:
# coktan teke

In [146]:
df3 = pd.merge(df1,df2)

In [147]:
df3

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2010
1,Veli,Muhendislik,2012
2,Ayse,Muhendislik,2013
3,Fatma,IK,2019


In [148]:
df4 = pd.DataFrame({'grup':['Muhasebe','Muhendislik','IK'],
                   'mudur':['Caner','Mehmet','Berk']})

In [149]:
df4

Unnamed: 0,grup,mudur
0,Muhasebe,Caner
1,Muhendislik,Mehmet
2,IK,Berk


In [150]:
pd.merge(df3,df4)

Unnamed: 0,calisanlar,grup,ilk_giris,mudur
0,Ali,Muhasebe,2010,Caner
1,Veli,Muhendislik,2012,Mehmet
2,Ayse,Muhendislik,2013,Mehmet
3,Fatma,IK,2019,Berk


In [151]:
# coktan coga

In [152]:
df5 = pd.DataFrame({'grup':['Muhasebe','Muhasebe',
                           'Muhendislik','Muhendislik','IK','IK'],
                   'yetenekler':['matematik','excel','kodlama', 'linux','excel',
                                'yonetim']})

In [153]:
df5

Unnamed: 0,grup,yetenekler
0,Muhasebe,matematik
1,Muhasebe,excel
2,Muhendislik,kodlama
3,Muhendislik,linux
4,IK,excel
5,IK,yonetim


In [154]:
pd.merge(df1,df5)

Unnamed: 0,calisanlar,grup,yetenekler
0,Ali,Muhasebe,matematik
1,Ali,Muhasebe,excel
2,Veli,Muhendislik,kodlama
3,Veli,Muhendislik,linux
4,Ayse,Muhendislik,kodlama
5,Ayse,Muhendislik,linux
6,Fatma,IK,excel
7,Fatma,IK,yonetim


# Toplulastirma ve Gruplama (Aggregation & Grouping)

In [155]:
import seaborn as sns

In [156]:
df = sns.load_dataset("planets")
#sns icerisinden veri seti yukle diyorum.. planets veri setini yukledim
df.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [157]:
df.shape

(1035, 6)

In [158]:
df.mean()

number               1.785507
orbital_period    2002.917596
mass                 2.638161
distance           264.069282
year              2009.070531
dtype: float64

In [159]:
df["mass"].mean()
# belirli bir islem icin istiyorsak bu sekilde yapilir

2.6381605847953233

In [160]:
df["mass"].count()
# mass icerisindeki degerleri saymak icin kullanilir

513

In [161]:
df["mass"].min()

0.0036

In [162]:
df["mass"].max()

25.0

In [163]:
df["mass"].sum()

1353.37638

In [164]:
df["mass"].std()

3.8186166509616046

In [165]:
df["mass"].var()

14.58183312700122

In [166]:
df.describe()
# veriseti icerisindeki tum degiskenleri betimsel istatistikleri anlaminda ortaya koyduk 

Unnamed: 0,number,orbital_period,mass,distance,year
count,1035.0,992.0,513.0,808.0,1035.0
mean,1.785507,2002.917596,2.638161,264.069282,2009.070531
std,1.240976,26014.728304,3.818617,733.116493,3.972567
min,1.0,0.090706,0.0036,1.35,1989.0
25%,1.0,5.44254,0.229,32.56,2007.0
50%,1.0,39.9795,1.26,55.25,2010.0
75%,2.0,526.005,3.04,178.5,2012.0
max,7.0,730000.0,25.0,8500.0,2014.0


In [167]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
number,1035.0,1.785507,1.240976,1.0,1.0,1.0,2.0,7.0
orbital_period,992.0,2002.917596,26014.728304,0.090706,5.44254,39.9795,526.005,730000.0
mass,513.0,2.638161,3.818617,0.0036,0.229,1.26,3.04,25.0
distance,808.0,264.069282,733.116493,1.35,32.56,55.25,178.5,8500.0
year,1035.0,2009.070531,3.972567,1989.0,2007.0,2010.0,2012.0,2014.0


In [168]:
df.dropna().describe().T
# veri seti icerisinde eksik degerler olursa onlarin betimsel istatistiklerine bakmak icin kullanilir.

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
number,498.0,1.73494,1.17572,1.0,1.0,1.0,2.0,6.0
orbital_period,498.0,835.778671,1469.128259,1.3283,38.27225,357.0,999.6,17337.5
mass,498.0,2.50932,3.636274,0.0036,0.2125,1.245,2.8675,25.0
distance,498.0,52.068213,46.596041,1.35,24.4975,39.94,59.3325,354.0
year,498.0,2007.37751,4.167284,1989.0,2005.0,2009.0,2011.0,2014.0


# Gruplama Islemleri

In [169]:
df = pd.DataFrame({'gruplar':['A','B','C','A','B','C'],
                  'veri':[10,23,45,54,23,34]}, columns = ['gruplar', 'veri'])
df

Unnamed: 0,gruplar,veri
0,A,10
1,B,23
2,C,45
3,A,54
4,B,23
5,C,34


In [170]:
df.groupby("gruplar")
# gruplari yakaladik
#gruplama islemi yapabilmemiz icin bize kategorik bir degisken lazim

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000002325AE19430>

In [171]:
df.groupby("gruplar").mean()
# yakalamis oldugumuz gruplarin ortalamasini aldik

Unnamed: 0_level_0,veri
gruplar,Unnamed: 1_level_1
A,32.0
B,23.0
C,39.5


In [172]:
df.groupby("gruplar").sum()

Unnamed: 0_level_0,veri
gruplar,Unnamed: 1_level_1
A,64
B,46
C,79


In [173]:
df = sns.load_dataset("planets")
#sns icerisinden veri seti yukle diyorum.. planets veri setini yukledim
df.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [174]:
df.groupby("method")["orbital_period"].mean()
# bircok degisken arasindan degiskenimi sectim ve orbital_period'a gore gruplayacagiz.

method
Astrometry                          631.180000
Eclipse Timing Variations          4751.644444
Imaging                          118247.737500
Microlensing                       3153.571429
Orbital Brightness Modulation         0.709307
Pulsar Timing                      7343.021201
Pulsation Timing Variations        1170.000000
Radial Velocity                     823.354680
Transit                              21.102073
Transit Timing Variations            79.783500
Name: orbital_period, dtype: float64

In [175]:
df.groupby("method")["orbital_period"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Astrometry,2.0,631.18,544.217663,246.36,438.77,631.18,823.59,1016.0
Eclipse Timing Variations,9.0,4751.644444,2499.130945,1916.25,2900.0,4343.5,5767.0,10220.0
Imaging,12.0,118247.7375,213978.177277,4639.15,8343.9,27500.0,94250.0,730000.0
Microlensing,7.0,3153.571429,1113.166333,1825.0,2375.0,3300.0,3550.0,5100.0
Orbital Brightness Modulation,3.0,0.709307,0.725493,0.240104,0.291496,0.342887,0.943908,1.544929
Pulsar Timing,5.0,7343.021201,16313.265573,0.090706,25.262,66.5419,98.2114,36525.0
Pulsation Timing Variations,1.0,1170.0,,1170.0,1170.0,1170.0,1170.0,1170.0
Radial Velocity,553.0,823.35468,1454.92621,0.73654,38.021,360.2,982.0,17337.5
Transit,397.0,21.102073,46.185893,0.355,3.16063,5.714932,16.1457,331.60059
Transit Timing Variations,3.0,79.7835,71.599884,22.3395,39.67525,57.011,108.5055,160.0


# Ileri Toplulastirma Islemleri(Aggregate, filter, transform, apply)

In [176]:
import pandas as pd
df = pd.DataFrame({'gruplar':['A','B','C','A','B','C'],
                  'degisken1':[10,23,33,22,11,99],
                  'degisken2':[100,253,333,262,111,969]},
                 columns = ['gruplar', 'degisken1', 'degisken2'])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,23,253
2,C,33,333
3,A,22,262
4,B,11,111
5,C,99,969


In [177]:
# aggregate

In [178]:
df.groupby("gruplar").mean()
# verisetini gruplara gore bol 

Unnamed: 0_level_0,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,16,181
B,17,182
C,66,651


In [179]:
df.groupby("gruplar").aggregate(["min", np.median, "max"])
# kendim ozelllikle bir istatistik belirtmek istersem aggregate fonk. kullanirim ..
# pandastan almis oldugum min fonksiyonunu tirnak icine aldim(veya oldugu gibi de birakabilirim) ..
# disardan cagirmis oldugum fonksiyonu numpy'in icerisinden yukaridaki gibi ifade ederiz ..

Unnamed: 0_level_0,degisken1,degisken1,degisken1,degisken2,degisken2,degisken2
Unnamed: 0_level_1,min,median,max,min,median,max
gruplar,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
A,10,16,22,100,181,262
B,11,17,23,111,182,253
C,33,66,99,333,651,969


In [180]:
df.groupby("gruplar").aggregate({'degisken1':'min', 'degisken2':'max'})
# burada degisken 1 icin min degerlerini, degisken 2 icin max degerlerini hesapladim

Unnamed: 0_level_0,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,262
B,11,253
C,33,969


# Filter

In [181]:
import pandas as pd
df = pd.DataFrame({'gruplar':['A','B','C','A','B','C'],
                  'degisken1':[10,23,33,22,11,99],
                  'degisken2':[100,253,333,262,111,969]},
                 columns = ['gruplar', 'degisken1', 'degisken2'])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,23,253
2,C,33,333
3,A,22,262
4,B,11,111
5,C,99,969


In [182]:
def filter_func(x):
    return x["degisken1"].std() > 9

In [183]:
df.groupby("gruplar").filter(filter_func)

Unnamed: 0,gruplar,degisken1,degisken2
2,C,33,333
5,C,99,969


# Transform

In [184]:
import pandas as pd
df = pd.DataFrame({'gruplar':['A','B','C','A','B','C'],
                  'degisken1':[10,23,33,22,11,99],
                  'degisken2':[100,253,333,262,111,969]},
                 columns = ['gruplar', 'degisken1', 'degisken2'])
df 

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,23,253
2,C,33,333
3,A,22,262
4,B,11,111
5,C,99,969


In [185]:
df["degisken1"]*9

0     90
1    207
2    297
3    198
4     99
5    891
Name: degisken1, dtype: int64

In [186]:
df_a = df.iloc[:,1:3]

In [187]:
df_a.transform(lambda x: x-x.mean())
# kisisel bir fonk ile bunu gerceklestirdik
# lambda -> bir fonksiyon isimlendirmesi yapmadan fonksiyon olusturmaya yariyor
#transform fonksiyonunu belirledigimiz bir donusturme fonksiyonu olarak degiskenlerin uzerinde kullanabiliyoruz

Unnamed: 0,degisken1,degisken2
0,-23.0,-238.0
1,-10.0,-85.0
2,0.0,-5.0
3,-11.0,-76.0
4,-22.0,-227.0
5,66.0,631.0


In [188]:
df_a.transform(lambda x: (x-x.mean()) / x.std())

Unnamed: 0,degisken1,degisken2
0,-0.687871,-0.738461
1,-0.299074,-0.263736
2,0.0,-0.015514
3,-0.328982,-0.235811
4,-0.657963,-0.704331
5,1.97389,1.957853


# Apply

In [189]:
import pandas as pd
df = pd.DataFrame({
                  'degisken1':[10,23,33,22,11,99],
                  'degisken2':[100,253,333,262,111,969]},
                 columns = ['degisken1', 'degisken2'])
df 

Unnamed: 0,degisken1,degisken2
0,10,100
1,23,253
2,33,333
3,22,262
4,11,111
5,99,969


In [190]:
df.apply(np.sum)
#sutun bazinda islem gerceklestirdi

degisken1     198
degisken2    2028
dtype: int64

In [191]:
df.apply(np.mean)

degisken1     33.0
degisken2    338.0
dtype: float64

In [192]:
?df.apply
#hakkinda bilgi almak istersem bu sekilde bir kullanim gerceklestirebiliyorum 

# Pivot Tablolar

In [193]:
# veri setleri uzerinde bazi satir ve sutun islemleri yaparak veri setine amaca uygun hale getirmek 
# icin kullanilan yapilardir..

In [194]:
import pandas as pd
import seaborn as sns
titanic =  sns.load_dataset('titanic')
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [195]:
titanic.groupby("sex")[["survived"]].mean()

Unnamed: 0_level_0,survived
sex,Unnamed: 1_level_1
female,0.742038
male,0.188908


In [196]:
titanic.groupby(["sex","class"])[["survived"]].aggregate("mean").unstack()
# burada ilk olarak bir gruplama islemi yapildi daha sonra bir degisken secti
# ve mean()'i uygula dedim

Unnamed: 0_level_0,survived,survived,survived
class,First,Second,Third
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [197]:
titanic.groupby(["sex","class"])[["survived"]].aggregate("mean")
# unstack()' i kaldirirsak hiyerarsik index yapisi cozulur..

Unnamed: 0_level_0,Unnamed: 1_level_0,survived
sex,class,Unnamed: 2_level_1
female,First,0.968085
female,Second,0.921053
female,Third,0.5
male,First,0.368852
male,Second,0.157407
male,Third,0.135447


In [198]:
# pivot ile pivot table

In [199]:
titanic.pivot_table("survived", index = "sex", columns = "class")
#pivot_table fonksiyonu odaklandigin degisken ve indexini girerek ve baska degiskenler de varsa 
# onlari da ekleyerek olustururuz
# index olark girilen yeri satir olarak degerlendirdi

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [200]:
 titanic.age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [201]:
age = pd.cut(titanic["age"],[0, 18,90])
age.head(10)
# burada surekli bir degisken olan age i kategorik bir degisken haline getirdik 

0    (18.0, 90.0]
1    (18.0, 90.0]
2    (18.0, 90.0]
3    (18.0, 90.0]
4    (18.0, 90.0]
5             NaN
6    (18.0, 90.0]
7     (0.0, 18.0]
8    (18.0, 90.0]
9     (0.0, 18.0]
Name: age, dtype: category
Categories (2, interval[int64]): [(0, 18] < (18, 90]]

In [202]:
titanic.pivot_table("survived", ["sex", age], "class")
# birinci degisken = sex ve age kategorik degiskenini aldik age disardan geldigi icin "" icine almadik

Unnamed: 0_level_0,class,First,Second,Third
sex,age,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
female,"(0, 18]",0.909091,1.0,0.511628
female,"(18, 90]",0.972973,0.9,0.423729
male,"(0, 18]",0.8,0.6,0.215686
male,"(18, 90]",0.375,0.071429,0.133663


# Dis Kaynakli Veri Okumak 

In [205]:
import pandas as pd

In [210]:
# csv okuma
pd.read_csv("../Pandas Jupyter/reading_data/reading_data/ornekcsv.csv", sep = ";")
# ayrim ifadesine gore okutmak icin sep = ; dedim

Unnamed: 0,a,b,c
0,78,12,1.0
1,78,12,2.0
2,78,324,3.0
3,7,2,4.0
4,88,23,5.0
5,6,2,
6,56,11,6.0
7,7,12,7.0
8,56,21,7.0
9,346,2,8.0


In [212]:
# txt okuma 
pd.read_csv("../Pandas Jupyter/reading_data/reading_data/duz_metin.txt")
# sep koymadan okuyabildik 

Unnamed: 0,1 2
0,2 2
1,3 2
2,4 2
3,5 2
4,6 2
5,7 2
6,8 2
7,9 2
8,10 2


In [216]:
# excel okuma
pd.read_excel("../Pandas Jupyter/reading_data/reading_data/ornekx.xlsx")
# excel formundaki veriyi okuduk

Unnamed: 0,a,b,c
0,78,12,1.0
1,78,12,2.0
2,78,324,3.0
3,7,2,4.0
4,88,23,5.0
5,6,2,
6,56,11,6.0
7,7,12,7.0
8,56,21,7.0
9,346,2,8.0


In [217]:
df = pd.read_excel("../Pandas Jupyter/reading_data/reading_data/ornekx.xlsx")

In [218]:
type(df)

pandas.core.frame.DataFrame

In [219]:
df.head()

Unnamed: 0,a,b,c
0,78,12,1.0
1,78,12,2.0
2,78,324,3.0
3,7,2,4.0
4,88,23,5.0


In [220]:
df.columns = ("A","B","C")

In [221]:
df

Unnamed: 0,A,B,C
0,78,12,1.0
1,78,12,2.0
2,78,324,3.0
3,7,2,4.0
4,88,23,5.0
5,6,2,
6,56,11,6.0
7,7,12,7.0
8,56,21,7.0
9,346,2,8.0


In [222]:
# NOT: Eger okumamiz gereken dosya html formatinda ise veya baska formatlarda ise o dosyayi ya excel ya
# txt formatina donusturmeliyiz