# Pandas Serisi Oluşturmak

In [1]:
import pandas as pd

In [2]:
pd.Series([10,22,35,43,511])

0     10
1     22
2     35
3     43
4    511
dtype: int64

In [3]:
seri = pd.Series([10,22,35,43,511])

In [4]:
type(seri)

pandas.core.series.Series

In [5]:
seri.axes

[RangeIndex(start=0, stop=5, step=1)]

In [6]:
seri.dtype

dtype('int64')

In [7]:
seri.size

5

In [8]:
seri.ndim

1

In [9]:
seri.values

array([ 10,  22,  35,  43, 511], dtype=int64)

In [10]:
seri.head(3)

0    10
1    22
2    35
dtype: int64

In [11]:
seri.tail(3)

2     35
3     43
4    511
dtype: int64

## İndex İsimlendirmesi

In [12]:
pd.Series([11,32,45,67,89])

0    11
1    32
2    45
3    67
4    89
dtype: int64

In [13]:
pd.Series([11,32,45,67,89], index = [1,3,5,7,9])

1    11
3    32
5    45
7    67
9    89
dtype: int64

In [14]:
seri = pd.Series([11,32,45,67,89], index = ["a", "b", "c", "d", "e"])

In [15]:
seri["a"]

11

In [16]:
seri["a":"c"]

a    11
b    32
c    45
dtype: int64

## Sözlük Üzerinden Liste Oluşturmak

In [17]:
sozluk = pd.Series({"reg":10, "log":11,"cart":12})
sozluk

reg     10
log     11
cart    12
dtype: int64

## İki Seriyi Birleştirerek Seri Oluşturma

In [18]:
pd.concat([sozluk, sozluk])

reg     10
log     11
cart    12
reg     10
log     11
cart    12
dtype: int64

# Eleman İşlemleri

In [19]:
import numpy as np

a = np.array([1,2,33,444,75])
seri = pd.Series(a)
seri

0      1
1      2
2     33
3    444
4     75
dtype: int32

In [20]:
seri[0]

1

In [21]:
seri[0:3]

0     1
1     2
2    33
dtype: int32

In [22]:
seri = pd.Series([121,200,150,99], ["reg","loj","cart","rf"])
seri

reg     121
loj     200
cart    150
rf       99
dtype: int64

In [23]:
seri.index

Index(['reg', 'loj', 'cart', 'rf'], dtype='object')

In [24]:
seri.keys

<bound method Series.keys of reg     121
loj     200
cart    150
rf       99
dtype: int64>

In [25]:
list(seri.items())

[('reg', 121), ('loj', 200), ('cart', 150), ('rf', 99)]

In [26]:
seri.values

array([121, 200, 150,  99], dtype=int64)

## Eleman Sorgulama

In [27]:
"reg" in seri

True

In [28]:
"a" in seri

False

In [29]:
seri["reg"]

121

## fancy Eleman

In [30]:
seri[["rf","reg"]]

rf      99
reg    121
dtype: int64

In [31]:
seri["reg"] = 130
seri["reg"]

130

In [32]:
seri["reg":"loj"]

reg    130
loj    200
dtype: int64

# Pandas DataFrame Oluşturma

In [33]:
l = [1,2,39,67,90]
l

[1, 2, 39, 67, 90]

In [34]:
pd.DataFrame(l, columns = ["değişken_ismi"])

Unnamed: 0,değişken_ismi
0,1
1,2
2,39
3,67
4,90


In [35]:
m = np.arange(1,10).reshape((3,3))
m

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [36]:
pd.DataFrame(m, columns = ["var1","var2","var3"])

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


## DataFrame İsimlendirme

In [37]:
df = pd.DataFrame(m, columns = ["var1", "var2", "var3"])
df.head()

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [38]:
df.columns = ("değ1", "değ2", "değ3")
df

Unnamed: 0,değ1,değ2,değ3
0,1,2,3
1,4,5,6
2,7,8,9


In [39]:
type(df)

pandas.core.frame.DataFrame

In [40]:
df.axes

[RangeIndex(start=0, stop=3, step=1),
 Index(['değ1', 'değ2', 'değ3'], dtype='object')]

In [41]:
df.shape

(3, 3)

In [42]:
df.ndim

2

In [43]:
df.size

9

In [44]:
df.values

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [45]:
type(df.values)

numpy.ndarray

In [46]:
df.head(2)

Unnamed: 0,değ1,değ2,değ3
0,1,2,3
1,4,5,6


In [47]:
df.tail(2)

Unnamed: 0,değ1,değ2,değ3
1,4,5,6
2,7,8,9


In [48]:
a = np.array([1,2,3,4,5])
pd.DataFrame(a, columns = ["değ1"])

Unnamed: 0,değ1
0,1
1,2
2,3
3,4
4,5


# Eleman İşlemleri

In [49]:
s1 = np.random.randint(10, size = 5)
s2 = np.random.randint(10, size = 5)
s3 = np.random.randint(10, size = 5)

In [50]:
sozluk = {"var1":s1, "var2":s2, "var3":s3}
sozluk

{'var1': array([6, 2, 0, 8, 4]),
 'var2': array([7, 6, 3, 4, 0]),
 'var3': array([5, 6, 5, 2, 8])}

In [51]:
df = pd.DataFrame(sozluk)
df

Unnamed: 0,var1,var2,var3
0,6,7,5
1,2,6,6
2,0,3,5
3,8,4,2
4,4,0,8


In [52]:
df[0:2]

Unnamed: 0,var1,var2,var3
0,6,7,5
1,2,6,6


In [53]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [54]:
df.index = ["a", "b", "c", "d", "e"]
df

Unnamed: 0,var1,var2,var3
a,6,7,5
b,2,6,6
c,0,3,5
d,8,4,2
e,4,0,8


In [55]:
df["b":"d"]

Unnamed: 0,var1,var2,var3
b,2,6,6
c,0,3,5
d,8,4,2


## Silme

In [56]:
df.drop("a", axis = 0)

Unnamed: 0,var1,var2,var3
b,2,6,6
c,0,3,5
d,8,4,2
e,4,0,8


In [57]:
df

Unnamed: 0,var1,var2,var3
a,6,7,5
b,2,6,6
c,0,3,5
d,8,4,2
e,4,0,8


In [58]:
df.drop("a", axis = 0, inplace = True)

In [59]:
df

Unnamed: 0,var1,var2,var3
b,2,6,6
c,0,3,5
d,8,4,2
e,4,0,8


## fancy İle Silme

In [60]:
df.drop(["c","e"], axis = 0)

Unnamed: 0,var1,var2,var3
b,2,6,6
d,8,4,2


## Değişkenler İçin

In [61]:
df

Unnamed: 0,var1,var2,var3
b,2,6,6
c,0,3,5
d,8,4,2
e,4,0,8


In [62]:
"var1" in df

True

In [63]:
l = ["var1", "var4", "var2"]

In [64]:
for i in l:
    print(i in df)

True
False
True


In [65]:
df

Unnamed: 0,var1,var2,var3
b,2,6,6
c,0,3,5
d,8,4,2
e,4,0,8


In [66]:
df["var4"] = df["var1"] / df["var2"]
df

Unnamed: 0,var1,var2,var3,var4
b,2,6,6,0.333333
c,0,3,5,0.0
d,8,4,2,2.0
e,4,0,8,inf


In [67]:
df.drop("var4", axis = 1, inplace=True)
df

Unnamed: 0,var1,var2,var3
b,2,6,6
c,0,3,5
d,8,4,2
e,4,0,8


In [68]:
l = ["var1", "var2"]

In [69]:
df.drop(l, axis = 1)

Unnamed: 0,var3
b,6
c,5
d,2
e,8


# Gözlem ve Değişken Seçimi: loc & iloc

In [70]:
m = np.random.randint(1, 30, size = (10, 3))
df = pd.DataFrame(m, columns = ["var1", "var2", "var3"])
df

Unnamed: 0,var1,var2,var3
0,17,15,24
1,22,1,18
2,7,12,1
3,18,9,24
4,14,7,3
5,26,28,28
6,27,12,9
7,28,6,19
8,19,5,5
9,10,14,7


## loc: tanımlandığı şekli ile seçim yapmak için kullanılır.

In [71]:
df.loc[0:3]

Unnamed: 0,var1,var2,var3
0,17,15,24
1,22,1,18
2,7,12,1
3,18,9,24


## iloc: alışık olduğumuz indeksleme mantığı ile seçim yapar.

In [72]:
df.iloc[0:3]

Unnamed: 0,var1,var2,var3
0,17,15,24
1,22,1,18
2,7,12,1


In [73]:
df.iloc[0,0]

17

In [74]:
df.iloc[:3,:2]

Unnamed: 0,var1,var2
0,17,15
1,22,1
2,7,12


In [75]:
df.loc[0:3,"var3"]

0    24
1    18
2     1
3    24
Name: var3, dtype: int32

# Koşullu Eleman İşlemleri

In [76]:
df["var1"]

0    17
1    22
2     7
3    18
4    14
5    26
6    27
7    28
8    19
9    10
Name: var1, dtype: int32

In [77]:
df[0:2][["var1","var2"]]

Unnamed: 0,var1,var2
0,17,15
1,22,1


In [78]:
df

Unnamed: 0,var1,var2,var3
0,17,15,24
1,22,1,18
2,7,12,1
3,18,9,24
4,14,7,3
5,26,28,28
6,27,12,9
7,28,6,19
8,19,5,5
9,10,14,7


In [79]:
df[df.var1 > 15]["var1"]

0    17
1    22
3    18
5    26
6    27
7    28
8    19
Name: var1, dtype: int32

In [80]:
df[(df.var1 > 15) & (df.var3 < 5)]

Unnamed: 0,var1,var2,var3


In [81]:
df.loc[(df.var1 > 15), ["var1", "var2"]]

Unnamed: 0,var1,var2
0,17,15
1,22,1
3,18,9
5,26,28
6,27,12
7,28,6
8,19,5


In [82]:
df[(df.var1 > 15)] [["var1", "var2"]]

Unnamed: 0,var1,var2
0,17,15
1,22,1
3,18,9
5,26,28
6,27,12
7,28,6
8,19,5


# Birleştirme (Join) İşlemleri

In [83]:
m = np.random.randint(1, 30, size = (5,3))
df1 = pd.DataFrame(m, columns = ["var1", "var2", "Var3"])
df1

Unnamed: 0,var1,var2,Var3
0,13,25,20
1,14,21,14
2,14,2,12
3,3,26,10
4,13,8,22


In [84]:
df2 = df1 + 99
df2

Unnamed: 0,var1,var2,Var3
0,112,124,119
1,113,120,113
2,113,101,111
3,102,125,109
4,112,107,121


In [85]:
pd.concat([df1,df2])

Unnamed: 0,var1,var2,Var3
0,13,25,20
1,14,21,14
2,14,2,12
3,3,26,10
4,13,8,22
0,112,124,119
1,113,120,113
2,113,101,111
3,102,125,109
4,112,107,121


In [86]:
pd.concat([df1,df2], ignore_index=True)

Unnamed: 0,var1,var2,Var3
0,13,25,20
1,14,21,14
2,14,2,12
3,3,26,10
4,13,8,22
5,112,124,119
6,113,120,113
7,113,101,111
8,102,125,109
9,112,107,121


In [87]:
df1.columns

Index(['var1', 'var2', 'Var3'], dtype='object')

In [88]:
df2.columns = ["var1", "var2", "değ3"]
df2

Unnamed: 0,var1,var2,değ3
0,112,124,119
1,113,120,113
2,113,101,111
3,102,125,109
4,112,107,121


In [89]:
pd.concat([df1, df2], ignore_index=True)

Unnamed: 0,var1,var2,Var3,değ3
0,13,25,20.0,
1,14,21,14.0,
2,14,2,12.0,
3,3,26,10.0,
4,13,8,22.0,
5,112,124,,119.0
6,113,120,,113.0
7,113,101,,111.0
8,102,125,,109.0
9,112,107,,121.0


In [90]:
pd.concat([df1, df2], join = "inner", ignore_index=True) # kesişimlerine göre birleştirme.

Unnamed: 0,var1,var2
0,13,25
1,14,21
2,14,2
3,3,26
4,13,8
5,112,124
6,113,120
7,113,101
8,102,125
9,112,107


# İleri Birleştirme İşlemleri

## Birebir Birleştirme

In [91]:
df1 = pd.DataFrame({'Çalışanlar' : ['Ali', 'Veli', 'Ayşe', 'Fatma'],
                    'Grup' : ['Muhasebe', 'Mühendislik', 'Mühendislik', 'İK']})
df1

Unnamed: 0,Çalışanlar,Grup
0,Ali,Muhasebe
1,Veli,Mühendislik
2,Ayşe,Mühendislik
3,Fatma,İK


In [92]:
df2 = pd.DataFrame({'Çalışanlar' : ['Ali', 'Veli', 'Ayşe', 'Fatma'],
                    'İlk Giriş' : [2010, 2009, 2014, 2019]})
df2

Unnamed: 0,Çalışanlar,İlk Giriş
0,Ali,2010
1,Veli,2009
2,Ayşe,2014
3,Fatma,2019


In [93]:
pd.merge(df1,df2)

Unnamed: 0,Çalışanlar,Grup,İlk Giriş
0,Ali,Muhasebe,2010
1,Veli,Mühendislik,2009
2,Ayşe,Mühendislik,2014
3,Fatma,İK,2019


In [94]:
pd.merge(df1, df2, on = "Çalışanlar")

Unnamed: 0,Çalışanlar,Grup,İlk Giriş
0,Ali,Muhasebe,2010
1,Veli,Mühendislik,2009
2,Ayşe,Mühendislik,2014
3,Fatma,İK,2019


## Çok'tan-Tek'e

In [95]:
df3 = pd.merge(df1, df2)
df3

Unnamed: 0,Çalışanlar,Grup,İlk Giriş
0,Ali,Muhasebe,2010
1,Veli,Mühendislik,2009
2,Ayşe,Mühendislik,2014
3,Fatma,İK,2019


In [96]:
df4 = pd.DataFrame({'Grup' : ['Muhasebe', 'Mühendislik', 'İK'],
                    'Müdür' : ['Caner', 'Mustafa', 'Berkcan']})
df4

Unnamed: 0,Grup,Müdür
0,Muhasebe,Caner
1,Mühendislik,Mustafa
2,İK,Berkcan


In [97]:
pd.merge(df3,df4)

Unnamed: 0,Çalışanlar,Grup,İlk Giriş,Müdür
0,Ali,Muhasebe,2010,Caner
1,Veli,Mühendislik,2009,Mustafa
2,Ayşe,Mühendislik,2014,Mustafa
3,Fatma,İK,2019,Berkcan


## Çok'tan-Çok'a

In [98]:
df5 = pd.DataFrame({'Grup' : ['Muhasebe', 'Muhasebe', 'Mühendislik', 'Mühendislik', 'İK', 'İK'],
                    'Yetenekler' : ['Matematik', 'Excel', 'Kodlama', 'linux', 'Excel', 'Yönetim']})
df5

Unnamed: 0,Grup,Yetenekler
0,Muhasebe,Matematik
1,Muhasebe,Excel
2,Mühendislik,Kodlama
3,Mühendislik,linux
4,İK,Excel
5,İK,Yönetim


In [99]:
pd.merge(df1, df5)

Unnamed: 0,Çalışanlar,Grup,Yetenekler
0,Ali,Muhasebe,Matematik
1,Ali,Muhasebe,Excel
2,Veli,Mühendislik,Kodlama
3,Veli,Mühendislik,linux
4,Ayşe,Mühendislik,Kodlama
5,Ayşe,Mühendislik,linux
6,Fatma,İK,Excel
7,Fatma,İK,Yönetim


# Toplulaştırma ve Gruplama (Aggregation & Grouping)

Basit toplulaştırma fonksiyonları:
* count()
* first()
* last()
* mean()
* median()
* min()
* max()
* std()
* var()
* sum()

In [100]:
import seaborn as sns

In [101]:
df = sns.load_dataset("planets")
df.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [102]:
df.shape

(1035, 6)

In [103]:
df["mass"].mean()

2.6381605847953216

In [104]:
df["mass"].count()

513

In [105]:
df["mass"].max()

25.0

In [106]:
df["mass"].min()

0.0036

In [107]:
df["mass"].sum()

1353.37638

In [108]:
df["mass"].std()

3.8186166509616046

In [109]:
df["mass"].var()

14.58183312700122

In [110]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
number,1035.0,1.785507,1.240976,1.0,1.0,1.0,2.0,7.0
orbital_period,992.0,2002.917596,26014.728304,0.090706,5.44254,39.9795,526.005,730000.0
mass,513.0,2.638161,3.818617,0.0036,0.229,1.26,3.04,25.0
distance,808.0,264.069282,733.116493,1.35,32.56,55.25,178.5,8500.0
year,1035.0,2009.070531,3.972567,1989.0,2007.0,2010.0,2012.0,2014.0


In [111]:
df.dropna().describe().T # Eksik değerler

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
number,498.0,1.73494,1.17572,1.0,1.0,1.0,2.0,6.0
orbital_period,498.0,835.778671,1469.128259,1.3283,38.27225,357.0,999.6,17337.5
mass,498.0,2.50932,3.636274,0.0036,0.2125,1.245,2.8675,25.0
distance,498.0,52.068213,46.596041,1.35,24.4975,39.94,59.3325,354.0
year,498.0,2007.37751,4.167284,1989.0,2005.0,2009.0,2011.0,2014.0


# Gruplama İşlemleri

In [112]:
df = pd.DataFrame({'Gruplar' : ['A', 'B', 'C', 'A', 'B', 'C'], 
                   'Veri' : [10,11,52,23,43,55]}, columns = ['Gruplar', 'Veri'])
df

Unnamed: 0,Gruplar,Veri
0,A,10
1,B,11
2,C,52
3,A,23
4,B,43
5,C,55


In [113]:
df.groupby("Gruplar")

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000024A110E0410>

In [114]:
df.groupby("Gruplar").mean()

Unnamed: 0_level_0,Veri
Gruplar,Unnamed: 1_level_1
A,16.5
B,27.0
C,53.5


In [115]:
df.groupby("Gruplar").sum()

Unnamed: 0_level_0,Veri
Gruplar,Unnamed: 1_level_1
A,33
B,54
C,107


In [116]:
df = sns.load_dataset("planets")
df.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [117]:
df.groupby("method")

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000024A156DC090>

In [118]:
df.groupby("method")["orbital_period"].mean()

method
Astrometry                          631.180000
Eclipse Timing Variations          4751.644444
Imaging                          118247.737500
Microlensing                       3153.571429
Orbital Brightness Modulation         0.709307
Pulsar Timing                      7343.021201
Pulsation Timing Variations        1170.000000
Radial Velocity                     823.354680
Transit                              21.102073
Transit Timing Variations            79.783500
Name: orbital_period, dtype: float64

In [119]:
df.groupby("method")["mass"].mean()

method
Astrometry                            NaN
Eclipse Timing Variations        5.125000
Imaging                               NaN
Microlensing                          NaN
Orbital Brightness Modulation         NaN
Pulsar Timing                         NaN
Pulsation Timing Variations           NaN
Radial Velocity                  2.630699
Transit                          1.470000
Transit Timing Variations             NaN
Name: mass, dtype: float64

In [120]:
df.groupby("method")["orbital_period"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Astrometry,2.0,631.18,544.217663,246.36,438.77,631.18,823.59,1016.0
Eclipse Timing Variations,9.0,4751.644444,2499.130945,1916.25,2900.0,4343.5,5767.0,10220.0
Imaging,12.0,118247.7375,213978.177277,4639.15,8343.9,27500.0,94250.0,730000.0
Microlensing,7.0,3153.571429,1113.166333,1825.0,2375.0,3300.0,3550.0,5100.0
Orbital Brightness Modulation,3.0,0.709307,0.725493,0.240104,0.291496,0.342887,0.943908,1.544929
Pulsar Timing,5.0,7343.021201,16313.265573,0.090706,25.262,66.5419,98.2114,36525.0
Pulsation Timing Variations,1.0,1170.0,,1170.0,1170.0,1170.0,1170.0,1170.0
Radial Velocity,553.0,823.35468,1454.92621,0.73654,38.021,360.2,982.0,17337.5
Transit,397.0,21.102073,46.185893,0.355,3.16063,5.714932,16.1457,331.60059
Transit Timing Variations,3.0,79.7835,71.599884,22.3395,39.67525,57.011,108.5055,160.0


# İleri Toplulaştırma İşlemleri (Aggregate, filter, transform, apply)

In [121]:
df = pd.DataFrame({'Gruplar' : ['A', 'B', 'C', 'A', 'B', 'C'], 
                   'Değişken1' : [10, 23, 33, 22, 11, 99], 
                   'Değişken2' : [100, 253, 333, 262, 111, 969]},
                   columns = ['Gruplar', 'Değişken1', 'Değişken2'])
df

Unnamed: 0,Gruplar,Değişken1,Değişken2
0,A,10,100
1,B,23,253
2,C,33,333
3,A,22,262
4,B,11,111
5,C,99,969


## aggregate

In [122]:
df.groupby("Gruplar").mean()

Unnamed: 0_level_0,Değişken1,Değişken2
Gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,16.0,181.0
B,17.0,182.0
C,66.0,651.0


In [123]:
df.groupby("Gruplar").aggregate([min, np.median, max])

Unnamed: 0_level_0,Değişken1,Değişken1,Değişken1,Değişken2,Değişken2,Değişken2
Unnamed: 0_level_1,min,median,max,min,median,max
Gruplar,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
A,10,16.0,22,100,181.0,262
B,11,17.0,23,111,182.0,253
C,33,66.0,99,333,651.0,969


In [124]:
df.groupby("Gruplar").aggregate({"Değişken1" : [min, np.median], "Değişken2": [np.median, max]})

Unnamed: 0_level_0,Değişken1,Değişken1,Değişken2,Değişken2
Unnamed: 0_level_1,min,median,median,max
Gruplar,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,10,16.0,181.0,262
B,11,17.0,182.0,253
C,33,66.0,651.0,969


# Filter

In [125]:
def filter_func(x):
    return x["Değişken1"].std() > 9

In [126]:
df.groupby("Gruplar").std()

Unnamed: 0_level_0,Değişken1,Değişken2
Gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,8.485281,114.551299
B,8.485281,100.409163
C,46.669048,449.719913


In [127]:
df.groupby("Gruplar").filter(filter_func)

Unnamed: 0,Gruplar,Değişken1,Değişken2
2,C,33,333
5,C,99,969


# Transform

In [128]:
df["Değişken1"]*9 

0     90
1    207
2    297
3    198
4     99
5    891
Name: Değişken1, dtype: int64

In [129]:
df_a = df.iloc[:,1:3]
df_a

Unnamed: 0,Değişken1,Değişken2
0,10,100
1,23,253
2,33,333
3,22,262
4,11,111
5,99,969


In [130]:
df_a.transform(lambda x : (x-x.mean()) / x.std())

Unnamed: 0,Değişken1,Değişken2
0,-0.687871,-0.738461
1,-0.299074,-0.263736
2,0.0,-0.015514
3,-0.328982,-0.235811
4,-0.657963,-0.704331
5,1.97389,1.957853


# apply

In [131]:
df = pd.DataFrame({'Gruplar' : ['A', 'B', 'C', 'A', 'B', 'C'], 
                   'Değişken1' : [10, 23, 33, 22, 11, 99], 
                   'Değişken2' : [100, 253, 333, 262, 111, 969]},
                   columns = ['Gruplar', 'Değişken1', 'Değişken2'])
df

Unnamed: 0,Gruplar,Değişken1,Değişken2
0,A,10,100
1,B,23,253
2,C,33,333
3,A,22,262
4,B,11,111
5,C,99,969


In [132]:
df.apply(np.sum)

Gruplar      ABCABC
Değişken1       198
Değişken2      2028
dtype: object

In [133]:
df.groupby("Gruplar").apply(np.sum)

Unnamed: 0_level_0,Gruplar,Değişken1,Değişken2
Gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,AA,32,362
B,BB,34,364
C,CC,132,1302


# Pivot Tablolar

In [134]:
titanic = sns.load_dataset('titanic')
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [135]:
titanic.groupby("sex")[["survived"]].mean()

Unnamed: 0_level_0,survived
sex,Unnamed: 1_level_1
female,0.742038
male,0.188908


In [136]:
titanic.groupby(["sex", "class"])[["survived"]].aggregate("mean").unstack()

Unnamed: 0_level_0,survived,survived,survived
class,First,Second,Third
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


## Pivot Tablo İle

In [137]:
titanic.pivot_table("survived", index = "sex", columns = "class")

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [138]:
titanic.age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [139]:
age = pd.cut(titanic["age"], [0, 18, 90])
age.head(10)

0    (18.0, 90.0]
1    (18.0, 90.0]
2    (18.0, 90.0]
3    (18.0, 90.0]
4    (18.0, 90.0]
5             NaN
6    (18.0, 90.0]
7     (0.0, 18.0]
8    (18.0, 90.0]
9     (0.0, 18.0]
Name: age, dtype: category
Categories (2, interval[int64, right]): [(0, 18] < (18, 90]]

In [140]:
titanic.pivot_table("survived", ["sex", age], "class")

Unnamed: 0_level_0,class,First,Second,Third
sex,age,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
female,"(0, 18]",0.909091,1.0,0.511628
female,"(18, 90]",0.972973,0.9,0.423729
male,"(0, 18]",0.8,0.6,0.215686
male,"(18, 90]",0.375,0.071429,0.133663


# Dış Kaynaklı Veri Okumak

In [143]:
# csv okuma
pd.read_csv("Okunacak_Dosyalar/ornekcsv.csv", sep = ";")

Unnamed: 0,a,b,c
0,78,12,1.0
1,78,12,2.0
2,78,324,3.0
3,7,2,4.0
4,88,23,5.0
5,6,2,
6,56,11,6.0
7,7,12,7.0
8,56,21,7.0
9,346,2,8.0


In [147]:
# txt okuma
pd.read_csv("Okunacak_Dosyalar/duz_metin.txt")

Unnamed: 0,1 2
0,2 2
1,3 2
2,4 2
3,5 2
4,6 2
5,7 2
6,8 2
7,9 2
8,10 2


In [150]:
# excel okuma
df = pd.read_excel("Okunacak_Dosyalar/ornekx.xlsx")
df

Unnamed: 0,a,b,c
0,78,12,1.0
1,78,12,2.0
2,78,324,3.0
3,7,2,4.0
4,88,23,5.0
5,6,2,
6,56,11,6.0
7,7,12,7.0
8,56,21,7.0
9,346,2,8.0


In [151]:
type(df)

pandas.core.frame.DataFrame

In [152]:
df.head()

Unnamed: 0,a,b,c
0,78,12,1.0
1,78,12,2.0
2,78,324,3.0
3,7,2,4.0
4,88,23,5.0


In [154]:
df.columns = ("A","B","C")
df

Unnamed: 0,A,B,C
0,78,12,1.0
1,78,12,2.0
2,78,324,3.0
3,7,2,4.0
4,88,23,5.0
5,6,2,
6,56,11,6.0
7,7,12,7.0
8,56,21,7.0
9,346,2,8.0
