# Pandas Serisi Oluşturma

In [1]:
import pandas as pd

In [2]:
pd.Series([1,2,3,4,5])

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [3]:
seri = pd.Series([1,2,3,4,5])

In [4]:
type(seri)

pandas.core.series.Series

In [5]:
seri.axes

[RangeIndex(start=0, stop=5, step=1)]

In [6]:
seri.dtype

dtype('int64')

In [7]:
seri.size

5

In [8]:
seri.ndim

1

In [9]:
seri.values

array([1, 2, 3, 4, 5], dtype=int64)

In [11]:
seri.head(3)

0    1
1    2
2    3
dtype: int64

In [12]:
seri.tail(3)

2    3
3    4
4    5
dtype: int64

In [13]:
# index isimlendirmesi

In [14]:
pd.Series([99,64,121,356,78])

0     99
1     64
2    121
3    356
4     78
dtype: int64

In [15]:
pd.Series([99,64,121,356,78], index = [1,3,5,7,9])

1     99
3     64
5    121
7    356
9     78
dtype: int64

In [18]:
a = pd.Series([99,64,121,356,78], index = ["a","b","c","d","e"])

In [19]:
a["a"]

99

In [21]:
a["a":"c"]

a     99
b     64
c    121
dtype: int64

In [22]:
# sozluk uzerinden liste olusturmak

In [25]:
sozluk = {"reg":10,
          "log":11,
          "cart":12}

In [26]:
seri = pd.Series(sozluk)

In [27]:
seri

reg     10
log     11
cart    12
dtype: int64

In [28]:
# iki seriyi birleştirmek

In [30]:
pd.concat([seri,seri])

reg     10
log     11
cart    12
reg     10
log     11
cart    12
dtype: int64

# Eleman İşlemleri

In [5]:
import numpy as np
import pandas as pd
a = np.array([1,2,33,444,75])
seri = pd.Series(a)
seri

0      1
1      2
2     33
3    444
4     75
dtype: int32

In [6]:
seri[0]

1

In [7]:
seri[0:3]

0     1
1     2
2    33
dtype: int32

In [9]:
seri = pd.Series([123,56,86,432,123] , index = ["a","b","c","d","e"])

In [10]:
seri

a    123
b     56
c     86
d    432
e    123
dtype: int64

In [11]:
seri.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [12]:
seri.keys

<bound method Series.keys of a    123
b     56
c     86
d    432
e    123
dtype: int64>

In [13]:
list(seri.items())

[('a', 123), ('b', 56), ('c', 86), ('d', 432), ('e', 123)]

In [14]:
seri.values

array([123,  56,  86, 432, 123], dtype=int64)

In [15]:
# eleman sorgulama
"a" in seri

True

In [16]:
"reg" in seri

False

In [17]:
seri["a"]

123

In [18]:
# fancy eleman
seri[["a","c"]]

a    123
c     86
dtype: int64

# Pandas DataFrame Oluşturma

In [19]:
import pandas as pd

In [20]:
l = [1,2,43,56,76]

In [21]:
l

[1, 2, 43, 56, 76]

In [22]:
pd.DataFrame(l,columns = ["degisken_ismi"])

Unnamed: 0,degisken_ismi
0,1
1,2
2,43
3,56
4,76


In [23]:
import numpy as np
m = np.arange(1,10).reshape((3,3))

In [24]:
pd.DataFrame(m,columns = ["var1","var2","var3"])

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [None]:
# df isimlendirme

In [25]:
df = pd.DataFrame(m,columns = ["var1","var2","var3"])
df.head()

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [26]:
df.columns

Index(['var1', 'var2', 'var3'], dtype='object')

In [27]:
df.columns = ("deg1","deg2","deg3")

In [28]:
df

Unnamed: 0,deg1,deg2,deg3
0,1,2,3
1,4,5,6
2,7,8,9


In [29]:
type(df)

pandas.core.frame.DataFrame

In [30]:
df.axes

[RangeIndex(start=0, stop=3, step=1),
 Index(['deg1', 'deg2', 'deg3'], dtype='object')]

In [31]:
df.shape

(3, 3)

In [32]:
df.ndim

2

In [33]:
df.size

9

In [34]:
df.values

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [35]:
type(df.values)

numpy.ndarray

In [36]:
df.tail()

Unnamed: 0,deg1,deg2,deg3
0,1,2,3
1,4,5,6
2,7,8,9


In [37]:
a = np.array([1,2,3,4,5])

In [38]:
pd.DataFrame(a,columns = ["deg1"])

Unnamed: 0,deg1
0,1
1,2
2,3
3,4
4,5


# Eleman İşlemleri

In [1]:
import numpy as np
s1 = np.random.randint(10, size = 5)
s2 = np.random.randint(10, size = 5)
s3 = np.random.randint(10, size = 5)

In [2]:
sozluk = {"var1":s1,"var2":s2,"var3":s3}

In [3]:
sozluk

{'var1': array([2, 9, 0, 9, 4]),
 'var2': array([5, 0, 1, 2, 9]),
 'var3': array([6, 1, 3, 7, 4])}

In [4]:
import pandas as pd
df = pd.DataFrame(sozluk)

In [5]:
df

Unnamed: 0,var1,var2,var3
0,2,5,6
1,9,0,1
2,0,1,3
3,9,2,7
4,4,9,4


In [6]:
df[0:1]

Unnamed: 0,var1,var2,var3
0,2,5,6


In [9]:
df.index = ["a","b","c","d","e"]

In [10]:
df

Unnamed: 0,var1,var2,var3
a,2,5,6
b,9,0,1
c,0,1,3
d,9,2,7
e,4,9,4


In [11]:
df["c":"e"]

Unnamed: 0,var1,var2,var3
c,0,1,3
d,9,2,7
e,4,9,4


In [12]:
#silme

In [13]:
df.drop("a", axis = 0)

Unnamed: 0,var1,var2,var3
b,9,0,1
c,0,1,3
d,9,2,7
e,4,9,4


In [14]:
df

Unnamed: 0,var1,var2,var3
a,2,5,6
b,9,0,1
c,0,1,3
d,9,2,7
e,4,9,4


In [15]:
df.drop("a", axis = 0, inplace = True)

In [16]:
df

Unnamed: 0,var1,var2,var3
b,9,0,1
c,0,1,3
d,9,2,7
e,4,9,4


In [17]:
# fancy

In [19]:
l = ["c","e"]
df.drop(l, axis = 0)

Unnamed: 0,var1,var2,var3
b,9,0,1
d,9,2,7


In [20]:
"var1" in df

True

In [22]:
l = ["var1","var4","var2"]
for item in l:
    print(item in df)

True
False
True


In [23]:
df

Unnamed: 0,var1,var2,var3
b,9,0,1
c,0,1,3
d,9,2,7
e,4,9,4


In [24]:
df["var4"] = df["var1"] / df["var2"]

In [25]:
df

Unnamed: 0,var1,var2,var3,var4
b,9,0,1,inf
c,0,1,3,0.0
d,9,2,7,4.5
e,4,9,4,0.444444


In [26]:
# degisken silmek

# Gözlem Ve Değişken Seçimi : loc & iloc

In [29]:
import numpy as np
import pandas as pd
m = np.random.randint(1,30, size = (10,3))
df = pd.DataFrame(m , columns = ["var1","var2","var3"])
df

Unnamed: 0,var1,var2,var3
0,9,6,5
1,27,5,20
2,8,4,4
3,3,12,23
4,28,25,5
5,6,28,12
6,24,8,21
7,16,28,7
8,3,19,6
9,7,9,26


In [30]:
# loc : tanımlandığı şekli ile seçim yapmak için kullanılır.

In [32]:
df.loc[0:3]

Unnamed: 0,var1,var2,var3
0,9,6,5
1,27,5,20
2,8,4,4
3,3,12,23


In [33]:
# iloc : indexleme ile seçim yapar bildiğimiz.

In [34]:
df.iloc[0:3]

Unnamed: 0,var1,var2,var3
0,9,6,5
1,27,5,20
2,8,4,4


# Koşullu Eleman İşlemleri

In [1]:
import pandas as pd
import numpy as np

In [2]:
arr = np.random.randint(10, size = (10,3))

In [3]:
arr

array([[3, 4, 7],
       [0, 3, 5],
       [1, 4, 4],
       [2, 5, 3],
       [4, 2, 9],
       [5, 8, 4],
       [8, 3, 8],
       [7, 3, 5],
       [5, 1, 7],
       [0, 6, 3]])

In [6]:
df = pd.DataFrame(arr, columns = ["var1","var2","var3"])

In [7]:
df

Unnamed: 0,var1,var2,var3
0,3,4,7
1,0,3,5
2,1,4,4
3,2,5,3
4,4,2,9
5,5,8,4
6,8,3,8
7,7,3,5
8,5,1,7
9,0,6,3


In [13]:
df[(df.var1 > 3) & (df.var2 > 5)][["var1","var2"]]

Unnamed: 0,var1,var2
5,5,8


In [14]:
df.loc[(df.var1 > 3),["var1","var2"]]

Unnamed: 0,var1,var2
4,4,2
5,5,8
6,8,3
7,7,3
8,5,1


In [15]:
df

Unnamed: 0,var1,var2,var3
0,3,4,7
1,0,3,5
2,1,4,4
3,2,5,3
4,4,2,9
5,5,8,4
6,8,3,8
7,7,3,5
8,5,1,7
9,0,6,3


# Birleştirme (Join) İşlemleri

In [18]:
import numpy as np
import pandas as pd
m = np.random.randint(1,30,size = (10,3))
df1 = pd.DataFrame(m,columns = ["var1","var2","var3"])
df1

Unnamed: 0,var1,var2,var3
0,22,23,23
1,1,8,8
2,19,28,27
3,10,26,20
4,29,17,29
5,7,2,5
6,17,16,18
7,21,1,28
8,24,27,2
9,23,24,10


In [19]:
df2 = df1 + 99

In [21]:
df2

Unnamed: 0,var1,var2,var3
0,121,122,122
1,100,107,107
2,118,127,126
3,109,125,119
4,128,116,128
5,106,101,104
6,116,115,117
7,120,100,127
8,123,126,101
9,122,123,109


In [25]:
pd.concat([df1,df2], ignore_index = True)
# ignore index yazmazsak indexler karışır hatalı olur.

Unnamed: 0,var1,var2,var3
0,22,23,23
1,1,8,8
2,19,28,27
3,10,26,20
4,29,17,29
5,7,2,5
6,17,16,18
7,21,1,28
8,24,27,2
9,23,24,10


In [26]:
df2.columns = ["var1","var2","deg3"]

In [27]:
df2

Unnamed: 0,var1,var2,deg3
0,121,122,122
1,100,107,107
2,118,127,126
3,109,125,119
4,128,116,128
5,106,101,104
6,116,115,117
7,120,100,127
8,123,126,101
9,122,123,109


In [28]:
# hata alacağız.sütun isimleri farklı çünkü

In [29]:
pd.concat([df1,df2])

Unnamed: 0,var1,var2,var3,deg3
0,22,23,23.0,
1,1,8,8.0,
2,19,28,27.0,
3,10,26,20.0,
4,29,17,29.0,
5,7,2,5.0,
6,17,16,18.0,
7,21,1,28.0,
8,24,27,2.0,
9,23,24,10.0,


In [30]:
# hatayı düzeltmek için kesişimleri alınabilir
pd.concat([df1,df2], join = "inner")

Unnamed: 0,var1,var2
0,22,23
1,1,8
2,19,28
3,10,26
4,29,17
5,7,2
6,17,16
7,21,1
8,24,27
9,23,24


In [33]:
df1

Unnamed: 0,var1,var2,var3
0,22,23,23
1,1,8,8
2,19,28,27
3,10,26,20
4,29,17,29
5,7,2,5
6,17,16,18
7,21,1,28
8,24,27,2
9,23,24,10


In [34]:
df2

Unnamed: 0,var1,var2,deg3
0,121,122,122
1,100,107,107
2,118,127,126
3,109,125,119
4,128,116,128
5,106,101,104
6,116,115,117
7,120,100,127
8,123,126,101
9,122,123,109


In [37]:
?pd.concat

[1;31mSignature:[0m
[0mpd[0m[1;33m.[0m[0mconcat[0m[1;33m([0m[1;33m
[0m    [0mobjs[0m[1;33m:[0m [1;34m'Iterable[NDFrame] | Mapping[Hashable, NDFrame]'[0m[1;33m,[0m[1;33m
[0m    [0maxis[0m[1;33m:[0m [1;34m'Axis'[0m [1;33m=[0m [1;36m0[0m[1;33m,[0m[1;33m
[0m    [0mjoin[0m[1;33m:[0m [1;34m'str'[0m [1;33m=[0m [1;34m'outer'[0m[1;33m,[0m[1;33m
[0m    [0mignore_index[0m[1;33m:[0m [1;34m'bool'[0m [1;33m=[0m [1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mkeys[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mlevels[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mnames[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mverify_integrity[0m[1;33m:[0m [1;34m'bool'[0m [1;33m=[0m [1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0msort[0m[1;33m:[0m [1;34m'bool'[0m [1;33m=[0m [1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mcopy[0m[1;33m:[0m [1;34m'bool'[0m [1;33m=[0m [1;32mTrue[0m[1;33m,[

In [38]:
# ya da birleştirirken bir sütunu tercih edebilirsiniz.
# kaldırılmış ?
#pd.concat([df1,df2], join_axes = [df2.columns])

# İleri Birleştirme İşlemleri

In [1]:
import pandas as pd
# birebir birleştirme

In [2]:
df1 = pd.DataFrame({"calisanlar":["Ali","Veli","Fatma","Ahmet"],
                   "grup":["Muhasebe","Mühendislik","Mühendislik","İK"]})
df2 = pd.DataFrame({"calisanlar":["Ali","Veli","Fatma","Ahmet"],
                   "ilk_giris":[2010,2005,2003,2002]})

In [4]:
birlesik = pd.merge(df1,df2, on = "calisanlar")

In [5]:
birlesik

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2010
1,Veli,Mühendislik,2005
2,Fatma,Mühendislik,2003
3,Ahmet,İK,2002


In [6]:
# coktan teke

In [7]:
df3 = pd.merge(df1,df2)

In [8]:
df3

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2010
1,Veli,Mühendislik,2005
2,Fatma,Mühendislik,2003
3,Ahmet,İK,2002


In [10]:
df4 = pd.DataFrame({"grup":["Muhasebe","Mühendislik","İK"],
                   "mudur":["Caner","Melis","Berkcan"]})

In [11]:
pd.merge(df3,df4)

Unnamed: 0,calisanlar,grup,ilk_giris,mudur
0,Ali,Muhasebe,2010,Caner
1,Veli,Mühendislik,2005,Melis
2,Fatma,Mühendislik,2003,Melis
3,Ahmet,İK,2002,Berkcan


# Toplulaştırma Ve Gruplama

In [12]:
import pandas as pd
import seaborn as sns

In [13]:
df = sns.load_dataset("planets")
df

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.300000,7.10,77.40,2006
1,Radial Velocity,1,874.774000,2.21,56.95,2008
2,Radial Velocity,1,763.000000,2.60,19.84,2011
3,Radial Velocity,1,326.030000,19.40,110.62,2007
4,Radial Velocity,1,516.220000,10.50,119.47,2009
...,...,...,...,...,...,...
1030,Transit,1,3.941507,,172.00,2006
1031,Transit,1,2.615864,,148.00,2007
1032,Transit,1,3.191524,,174.00,2007
1033,Transit,1,4.125083,,293.00,2008


In [14]:
df.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [15]:
df.shape

(1035, 6)

In [16]:
df.size

6210

In [18]:
df.mean()

  df.mean()


number               1.785507
orbital_period    2002.917596
mass                 2.638161
distance           264.069282
year              2009.070531
dtype: float64

In [19]:
df["mass"].mean()

2.6381605847953233

In [21]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
number,1035.0,1.785507,1.240976,1.0,1.0,1.0,2.0,7.0
orbital_period,992.0,2002.917596,26014.728304,0.090706,5.44254,39.9795,526.005,730000.0
mass,513.0,2.638161,3.818617,0.0036,0.229,1.26,3.04,25.0
distance,808.0,264.069282,733.116493,1.35,32.56,55.25,178.5,8500.0
year,1035.0,2009.070531,3.972567,1989.0,2007.0,2010.0,2012.0,2014.0


In [23]:
# eksik gözlemleri silerek

In [22]:
df.dropna().describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
number,498.0,1.73494,1.17572,1.0,1.0,1.0,2.0,6.0
orbital_period,498.0,835.778671,1469.128259,1.3283,38.27225,357.0,999.6,17337.5
mass,498.0,2.50932,3.636274,0.0036,0.2125,1.245,2.8675,25.0
distance,498.0,52.068213,46.596041,1.35,24.4975,39.94,59.3325,354.0
year,498.0,2007.37751,4.167284,1989.0,2005.0,2009.0,2011.0,2014.0


# Gruplama İşlemleri

In [4]:
import pandas as pd
df = pd.DataFrame({'gruplar':['A','B','C','A','B','C'],
                  'veri':[10,11,52,23,43,45]},columns=['gruplar','veri'])

In [5]:
df

Unnamed: 0,gruplar,veri
0,A,10
1,B,11
2,C,52
3,A,23
4,B,43
5,C,45


In [7]:
df.groupby('gruplar')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000002962CA3D370>

In [8]:
df.groupby('gruplar').mean()

Unnamed: 0_level_0,veri
gruplar,Unnamed: 1_level_1
A,16.5
B,27.0
C,48.5


In [9]:
df.groupby('gruplar').sum()

Unnamed: 0_level_0,veri
gruplar,Unnamed: 1_level_1
A,33
B,54
C,97


In [10]:
import seaborn as sns

In [12]:
df = sns.load_dataset("planets")
df.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [15]:
df.groupby('method')["orbital_period"]

<pandas.core.groupby.generic.SeriesGroupBy object at 0x0000029630415970>

In [16]:
df.groupby('method')["orbital_period"].mean()

method
Astrometry                          631.180000
Eclipse Timing Variations          4751.644444
Imaging                          118247.737500
Microlensing                       3153.571429
Orbital Brightness Modulation         0.709307
Pulsar Timing                      7343.021201
Pulsation Timing Variations        1170.000000
Radial Velocity                     823.354680
Transit                              21.102073
Transit Timing Variations            79.783500
Name: orbital_period, dtype: float64

In [17]:
df.groupby('method')["mass"].mean()

method
Astrometry                            NaN
Eclipse Timing Variations        5.125000
Imaging                               NaN
Microlensing                          NaN
Orbital Brightness Modulation         NaN
Pulsar Timing                         NaN
Pulsation Timing Variations           NaN
Radial Velocity                  2.630699
Transit                          1.470000
Transit Timing Variations             NaN
Name: mass, dtype: float64

In [18]:
df.groupby('method')["orbital_period"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Astrometry,2.0,631.18,544.217663,246.36,438.77,631.18,823.59,1016.0
Eclipse Timing Variations,9.0,4751.644444,2499.130945,1916.25,2900.0,4343.5,5767.0,10220.0
Imaging,12.0,118247.7375,213978.177277,4639.15,8343.9,27500.0,94250.0,730000.0
Microlensing,7.0,3153.571429,1113.166333,1825.0,2375.0,3300.0,3550.0,5100.0
Orbital Brightness Modulation,3.0,0.709307,0.725493,0.240104,0.291496,0.342887,0.943908,1.544929
Pulsar Timing,5.0,7343.021201,16313.265573,0.090706,25.262,66.5419,98.2114,36525.0
Pulsation Timing Variations,1.0,1170.0,,1170.0,1170.0,1170.0,1170.0,1170.0
Radial Velocity,553.0,823.35468,1454.92621,0.73654,38.021,360.2,982.0,17337.5
Transit,397.0,21.102073,46.185893,0.355,3.16063,5.714932,16.1457,331.60059
Transit Timing Variations,3.0,79.7835,71.599884,22.3395,39.67525,57.011,108.5055,160.0


# İleri Aggregation İşlemleri(Aggregate,Filter,Transform)

In [20]:
import pandas as pd
df = pd.DataFrame({'gruplar':['A','B','C','A','B','C'],
                  'degisken1':[10,15,25,76,43,22],
                  'degisken2':[3,6,9,45,2,8]},columns = ['gruplar','degisken1','degisken2'])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,3
1,B,15,6
2,C,25,9
3,A,76,45
4,B,43,2
5,C,22,8


In [21]:
# aggregate

In [23]:
df.groupby('gruplar').mean()

Unnamed: 0_level_0,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,43.0,24.0
B,29.0,4.0
C,23.5,8.5


In [24]:
import numpy as np

In [26]:
df.groupby('gruplar').aggregate(["min",np.median,"max"])

Unnamed: 0_level_0,degisken1,degisken1,degisken1,degisken2,degisken2,degisken2
Unnamed: 0_level_1,min,median,max,min,median,max
gruplar,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
A,10,43.0,76,3,24.0,45
B,15,29.0,43,2,4.0,6
C,22,23.5,25,8,8.5,9


In [27]:
df.groupby('gruplar').aggregate({'degisken1':"min",'degisken2':"max"})

Unnamed: 0_level_0,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,45
B,15,6
C,22,9


In [28]:
# filter

In [29]:
import pandas as pd
df = pd.DataFrame({'gruplar':['A','B','C','A','B','C'],
                  'degisken1':[10,15,25,76,43,22],
                  'degisken2':[335,665,999,458,356,100]},columns = ['gruplar','degisken1','degisken2'])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,335
1,B,15,665
2,C,25,999
3,A,76,458
4,B,43,356
5,C,22,100


In [32]:
def filter_func(x):
    return x['degisken1'].std()>9

In [33]:
df.groupby('gruplar').std()

Unnamed: 0_level_0,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,46.669048,86.974134
B,19.79899,218.495995
C,2.12132,635.688996


In [31]:
df.groupby('gruplar').filter(filter_func)

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,335
1,B,15,665
3,A,76,458
4,B,43,356


In [None]:
# transform

In [34]:
import pandas as pd
df = pd.DataFrame({'gruplar':['A','B','C','A','B','C'],
                  'degisken1':[10,15,25,76,43,22],
                  'degisken2':[335,665,999,458,356,100]},columns = ['gruplar','degisken1','degisken2'])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,335
1,B,15,665
2,C,25,999
3,A,76,458
4,B,43,356
5,C,22,100


In [35]:
df['degisken1']*9

0     90
1    135
2    225
3    684
4    387
5    198
Name: degisken1, dtype: int64

In [41]:
df_a = df.iloc[:,1:3]

In [43]:
df_a.transform(lambda x:(x-x.mean())-x.std())

Unnamed: 0,degisken1,degisken2
0,-46.241665,-461.848519
1,-41.241665,-131.848519
2,-31.241665,202.151481
3,19.758335,-338.848519
4,-13.241665,-440.848519
5,-34.241665,-696.848519


In [44]:
# apply

In [49]:
import pandas as pd
df = pd.DataFrame({'gruplar':['A','B','C','A','B','C'],
                  'degisken1':[10,15,25,76,43,22],
                  'degisken2':[335,665,999,458,356,100]},columns = ['gruplar','degisken1','degisken2'])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,335
1,B,15,665
2,C,25,999
3,A,76,458
4,B,43,356
5,C,22,100


In [47]:
df.apply(np.sum)

degisken1     191
degisken2    2913
dtype: int64

In [48]:
df.apply(np.mean)

degisken1     31.833333
degisken2    485.500000
dtype: float64

In [52]:
df.groupby('gruplar').apply(np.sum)
# kategorik verileri de topluyor.

Unnamed: 0_level_0,gruplar,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,AA,86,793
B,BB,58,1021
C,CC,47,1099


# Pivot Tablolar

In [5]:
import pandas as pd
import seaborn as sns
titanic = sns.load_dataset('titanic')
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [7]:
titanic.groupby('sex')[['survived']].mean()

Unnamed: 0_level_0,survived
sex,Unnamed: 1_level_1
female,0.742038
male,0.188908


In [9]:
titanic.groupby(['sex','class'])[['survived']].aggregate('mean')

Unnamed: 0_level_0,Unnamed: 1_level_0,survived
sex,class,Unnamed: 2_level_1
female,First,0.968085
female,Second,0.921053
female,Third,0.5
male,First,0.368852
male,Second,0.157407
male,Third,0.135447


In [10]:
titanic.groupby(['sex','class'])[['survived']].aggregate('mean').unstack()

Unnamed: 0_level_0,survived,survived,survived
class,First,Second,Third
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [11]:
# pivot table

In [12]:
titanic.pivot_table('survived',index = 'sex',columns = 'class')

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [14]:
titanic.age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [15]:
age = pd.cut(titanic['age'],[0,18,90])

In [17]:
age.head(10)

0    (18.0, 90.0]
1    (18.0, 90.0]
2    (18.0, 90.0]
3    (18.0, 90.0]
4    (18.0, 90.0]
5             NaN
6    (18.0, 90.0]
7     (0.0, 18.0]
8    (18.0, 90.0]
9     (0.0, 18.0]
Name: age, dtype: category
Categories (2, interval[int64, right]): [(0, 18] < (18, 90]]

In [18]:
titanic.pivot_table('survived',index =['sex',age],columns='class')

Unnamed: 0_level_0,class,First,Second,Third
sex,age,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
female,"(0, 18]",0.909091,1.0,0.511628
female,"(18, 90]",0.972973,0.9,0.423729
male,"(0, 18]",0.8,0.6,0.215686
male,"(18, 90]",0.375,0.071429,0.133663
