## PANDAS

* Panel Data
* Veri manipulasyonu ve veri analizi icin yazılmıs acık kaynak kodlu bir Python kutuphanesidir.
* Ekonometrik ve finansal calısmalar icin dogmustur.
* Temeli 2008 yılında atılmıstır.
* R icinde DataFrame yapısını Python dunyasına tasımıs ve DataFrame'ler uzerinde hızlı ve etkili calısabilme imkanı  
  saglamıştır.
* Bir cok farklı veri tipini okuma ve yazma imkanı saglar.

## Pandas Serisi Olusturmak

In [1]:
import pandas as pd

In [2]:
pd.Series([10,88,3,4,5])

0    10
1    88
2     3
3     4
4     5
dtype: int64

In [3]:
seri = pd.Series([10,88,3,4,5])

In [4]:
type(seri)

pandas.core.series.Series

In [5]:
seri.axes

[RangeIndex(start=0, stop=5, step=1)]

In [6]:
seri.dtype

dtype('int64')

In [7]:
seri.size

5

In [8]:
seri.ndim

1

In [9]:
seri.shape

(5,)

In [10]:
seri.values

array([10, 88,  3,  4,  5], dtype=int64)

In [11]:
seri.head(3)                  #ilk 5 harfi istiyor

0    10
1    88
2     3
dtype: int64

In [12]:
seri.tail(3)                  #sondan bakmaya yarıyor

2    3
3    4
4    5
dtype: int64

In [13]:
#index isimlendirmesi

In [14]:
pd.Series([99,22,332,94,5])

0     99
1     22
2    332
3     94
4      5
dtype: int64

In [15]:
pd.Series([99,22,332,94,5], index = [1,3,5,7,9])

1     99
3     22
5    332
7     94
9      5
dtype: int64

In [16]:
pd.Series([99,22,332,94,5], index = ["a","b","c","d","e"])

a     99
b     22
c    332
d     94
e      5
dtype: int64

In [17]:
seri = pd.Series([99,22,332,94,5], index = ["a","b","c","d","e"])

In [18]:
seri["a"]

99

In [19]:
seri["a":"c"]

a     99
b     22
c    332
dtype: int64

In [20]:
#sozluk uzerinden liste olusturmak

In [21]:
sozluk = pd.Series({"reg":10, "log":11, "cart":12})

In [22]:
sozluk

reg     10
log     11
cart    12
dtype: int64

In [23]:
sozluk = {"reg":10, "log":11, "cart":12}

In [24]:
seri = pd.Series(sozluk)

In [25]:
seri

reg     10
log     11
cart    12
dtype: int64

In [26]:
#iki seriyi birlestirerek seri olusturma

In [27]:
pd.concat([seri,seri])

reg     10
log     11
cart    12
reg     10
log     11
cart    12
dtype: int64

## Eleman Islemleri

In [28]:
import numpy as np
a = np.array([1,2,33,444,75])
seri = pd.Series(a)
seri

0      1
1      2
2     33
3    444
4     75
dtype: int32

In [29]:
seri[0]

1

In [30]:
seri[0:3]

0     1
1     2
2    33
dtype: int32

In [31]:
seri = pd.Series([121,200,150,99], index= ["reg","loj","cart","rf"])

In [32]:
seri

reg     121
loj     200
cart    150
rf       99
dtype: int64

In [33]:
seri.index

Index(['reg', 'loj', 'cart', 'rf'], dtype='object')

In [34]:
seri.keys

<bound method Series.keys of reg     121
loj     200
cart    150
rf       99
dtype: int64>

In [35]:
list(seri.items())

[('reg', 121), ('loj', 200), ('cart', 150), ('rf', 99)]

In [36]:
seri.values

array([121, 200, 150,  99], dtype=int64)

In [37]:
#eleman sorgulama

In [38]:
"reg" in seri

True

In [39]:
"a" in seri

False

In [40]:
seri["reg"]

121

In [41]:
#fancy eleman

In [42]:
seri[["rf","reg"]]

rf      99
reg    121
dtype: int64

In [43]:
seri["reg"] = 130

In [44]:
seri["reg"]

130

In [45]:
seri["reg":"loj"]

reg    130
loj    200
dtype: int64

## Pandas DataFrame Olusturma

In [46]:
l = [1,2,39,67,90]

In [47]:
l

[1, 2, 39, 67, 90]

In [48]:
pd.DataFrame(l, columns = ["degisken_ismi"])

Unnamed: 0,degisken_ismi
0,1
1,2
2,39
3,67
4,90


In [49]:
m = np.arange(1,10).reshape((3,3))
m

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [50]:
pd.DataFrame(m, columns = ["var1", "var2", "var3"])

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [51]:
#df isimlendirme

In [52]:
df = pd.DataFrame(m, columns =  ["var1", "var2", "var3"])
df.head()

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [53]:
df.columns = ["deg1","deg2","deg3"]

In [54]:
df

Unnamed: 0,deg1,deg2,deg3
0,1,2,3
1,4,5,6
2,7,8,9


In [55]:
type(df)

pandas.core.frame.DataFrame

In [56]:
df.axes

[RangeIndex(start=0, stop=3, step=1),
 Index(['deg1', 'deg2', 'deg3'], dtype='object')]

In [57]:
df.shape

(3, 3)

In [58]:
df.ndim

2

In [59]:
df.size

9

In [60]:
df.values

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [61]:
type(df.values)

numpy.ndarray

In [62]:
df.head()

Unnamed: 0,deg1,deg2,deg3
0,1,2,3
1,4,5,6
2,7,8,9


In [63]:
df.tail(1)

Unnamed: 0,deg1,deg2,deg3
2,7,8,9


In [64]:
a = np.array([1,2,3,4,5])

In [65]:
pd.DataFrame(a, columns = ["deg1"])

Unnamed: 0,deg1
0,1
1,2
2,3
3,4
4,5


## DataFrame eleman islemleri

In [66]:
s1 = np.random.randint(10, size = 5)
s2 = np.random.randint(10, size = 5)
s3 = np.random.randint(10, size = 5)

In [67]:
sozluk = {"var1": s1, "var2": s2, "var3": s3}

In [68]:
sozluk

{'var1': array([3, 8, 1, 5, 6]),
 'var2': array([5, 0, 9, 0, 6]),
 'var3': array([1, 7, 8, 1, 0])}

In [69]:
df = pd.DataFrame(sozluk)

In [70]:
df

Unnamed: 0,var1,var2,var3
0,3,5,1
1,8,0,7
2,1,9,8
3,5,0,1
4,6,6,0


In [71]:
df[0:1]

Unnamed: 0,var1,var2,var3
0,3,5,1


In [72]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [73]:
df.index = ["a","b","c","d","e"]

In [74]:
df

Unnamed: 0,var1,var2,var3
a,3,5,1
b,8,0,7
c,1,9,8
d,5,0,1
e,6,6,0


In [75]:
df["c":"e"]

Unnamed: 0,var1,var2,var3
c,1,9,8
d,5,0,1
e,6,6,0


In [76]:
#silme

In [77]:
df.drop("a", axis = 0)

Unnamed: 0,var1,var2,var3
b,8,0,7
c,1,9,8
d,5,0,1
e,6,6,0


In [78]:
df

Unnamed: 0,var1,var2,var3
a,3,5,1
b,8,0,7
c,1,9,8
d,5,0,1
e,6,6,0


In [79]:
df.drop("a", axis = 0, inplace = True)     #insplace: kalıcı bir sekilde siliyor.

In [80]:
df

Unnamed: 0,var1,var2,var3
b,8,0,7
c,1,9,8
d,5,0,1
e,6,6,0


In [81]:
#fancy

In [82]:
l = ["c","e"]

In [83]:
df.drop(l, axis = 0)

Unnamed: 0,var1,var2,var3
b,8,0,7
d,5,0,1


In [84]:
#degiskenler icin

In [85]:
df

Unnamed: 0,var1,var2,var3
b,8,0,7
c,1,9,8
d,5,0,1
e,6,6,0


In [86]:
"var1" in df

True

In [87]:
l = ["var1","var4","var2"]

In [88]:
for i in l:
    print(i in df)

True
False
True


In [89]:
df

Unnamed: 0,var1,var2,var3
b,8,0,7
c,1,9,8
d,5,0,1
e,6,6,0


In [90]:
df["var1"]

b    8
c    1
d    5
e    6
Name: var1, dtype: int32

In [91]:
df["var4"] = df["var1"] / df["var2"]

In [92]:
df

Unnamed: 0,var1,var2,var3,var4
b,8,0,7,inf
c,1,9,8,0.111111
d,5,0,1,inf
e,6,6,0,1.0


In [93]:
df.drop("var4", axis = 1)

Unnamed: 0,var1,var2,var3
b,8,0,7
c,1,9,8
d,5,0,1
e,6,6,0


In [94]:
df

Unnamed: 0,var1,var2,var3,var4
b,8,0,7,inf
c,1,9,8,0.111111
d,5,0,1,inf
e,6,6,0,1.0


In [95]:
df.drop("var4", axis = 1, inplace = True)

In [96]:
df

Unnamed: 0,var1,var2,var3
b,8,0,7
c,1,9,8
d,5,0,1
e,6,6,0


In [97]:
l = ["var1","var2"]

In [98]:
df.drop(l, axis = 1)

Unnamed: 0,var3
b,7
c,8
d,1
e,0


In [99]:
df

Unnamed: 0,var1,var2,var3
b,8,0,7
c,1,9,8
d,5,0,1
e,6,6,0


# Gözlem Ve Degisken Secimi: loc & iloc

In [116]:
import numpy as np
import pandas as pd
m = np.random.randint(1,30, size = (10,3))
df = pd.DataFrame(m, columns = ["var1","var2","var3"])


In [117]:
df

Unnamed: 0,var1,var2,var3
0,25,14,1
1,23,16,19
2,14,5,18
3,27,21,3
4,20,7,10
5,8,29,27
6,5,2,5
7,9,23,15
8,20,17,10
9,8,29,11


In [118]:
?np.random.randint

In [119]:
#loc: tanımlandıgı sekli ile secim yapmak icin kullanılır.

In [120]:
df.loc[0:3]

Unnamed: 0,var1,var2,var3
0,25,14,1
1,23,16,19
2,14,5,18
3,27,21,3


In [121]:
#iloc: alısık oldugumuz indeksleme mantıgı ile secim yapar.

In [122]:
df.iloc[0:3]

Unnamed: 0,var1,var2,var3
0,25,14,1
1,23,16,19
2,14,5,18


In [123]:
df.iloc[0,0]

25

In [124]:
df

Unnamed: 0,var1,var2,var3
0,25,14,1
1,23,16,19
2,14,5,18
3,27,21,3
4,20,7,10
5,8,29,27
6,5,2,5
7,9,23,15
8,20,17,10
9,8,29,11


In [125]:
df.iloc[:3,:2]

Unnamed: 0,var1,var2
0,25,14
1,23,16
2,14,5


In [126]:
df.loc[0:3, "var3"]

0     1
1    19
2    18
3     3
Name: var3, dtype: int32

In [127]:
df.iloc[0:3]["var3"]

0     1
1    19
2    18
Name: var3, dtype: int32