## PANDAS

* Panel Data
* Veri manipulasyonu ve veri analizi icin yazılmıs acık kaynak kodlu bir Python kutuphanesidir.
* Ekonometrik ve finansal calısmalar icin dogmustur.
* Temeli 2008 yılında atılmıstır.
* R icinde DataFrame yapısını Python dunyasına tasımıs ve DataFrame'ler uzerinde hızlı ve etkili calısabilme imkanı  
  saglamıştır.
* Bir cok farklı veri tipini okuma ve yazma imkanı saglar.

## Pandas Serisi Olusturmak

In [388]:
import pandas as pd

In [389]:
pd.Series([10,88,3,4,5])

0    10
1    88
2     3
3     4
4     5
dtype: int64

In [390]:
seri = pd.Series([10,88,3,4,5])

In [391]:
type(seri)

pandas.core.series.Series

In [392]:
seri.axes

[RangeIndex(start=0, stop=5, step=1)]

In [393]:
seri.dtype

dtype('int64')

In [394]:
seri.size

5

In [395]:
seri.ndim

1

In [396]:
seri.shape

(5,)

In [397]:
seri.values

array([10, 88,  3,  4,  5], dtype=int64)

In [398]:
seri.head(3)                  #ilk 5 harfi istiyor

0    10
1    88
2     3
dtype: int64

In [399]:
seri.tail(3)                  #sondan bakmaya yarıyor

2    3
3    4
4    5
dtype: int64

In [400]:
#index isimlendirmesi

In [401]:
pd.Series([99,22,332,94,5])

0     99
1     22
2    332
3     94
4      5
dtype: int64

In [402]:
pd.Series([99,22,332,94,5], index = [1,3,5,7,9])

1     99
3     22
5    332
7     94
9      5
dtype: int64

In [403]:
pd.Series([99,22,332,94,5], index = ["a","b","c","d","e"])

a     99
b     22
c    332
d     94
e      5
dtype: int64

In [404]:
seri = pd.Series([99,22,332,94,5], index = ["a","b","c","d","e"])

In [405]:
seri["a"]

99

In [406]:
seri["a":"c"]

a     99
b     22
c    332
dtype: int64

In [407]:
#sozluk uzerinden liste olusturmak

In [408]:
sozluk = pd.Series({"reg":10, "log":11, "cart":12})

In [409]:
sozluk

reg     10
log     11
cart    12
dtype: int64

In [410]:
sozluk = {"reg":10, "log":11, "cart":12}

In [411]:
seri = pd.Series(sozluk)

In [412]:
seri

reg     10
log     11
cart    12
dtype: int64

In [413]:
#iki seriyi birlestirerek seri olusturma

In [414]:
pd.concat([seri,seri])

reg     10
log     11
cart    12
reg     10
log     11
cart    12
dtype: int64

## Eleman Islemleri

In [415]:
import numpy as np
a = np.array([1,2,33,444,75])
seri = pd.Series(a)
seri

0      1
1      2
2     33
3    444
4     75
dtype: int32

In [416]:
seri[0]

1

In [417]:
seri[0:3]

0     1
1     2
2    33
dtype: int32

In [418]:
seri = pd.Series([121,200,150,99], index= ["reg","loj","cart","rf"])

In [419]:
seri

reg     121
loj     200
cart    150
rf       99
dtype: int64

In [420]:
seri.index

Index(['reg', 'loj', 'cart', 'rf'], dtype='object')

In [421]:
seri.keys

<bound method Series.keys of reg     121
loj     200
cart    150
rf       99
dtype: int64>

In [422]:
list(seri.items())

[('reg', 121), ('loj', 200), ('cart', 150), ('rf', 99)]

In [423]:
seri.values

array([121, 200, 150,  99], dtype=int64)

In [424]:
#eleman sorgulama

In [425]:
"reg" in seri

True

In [426]:
"a" in seri

False

In [427]:
seri["reg"]

121

In [428]:
#fancy eleman

In [429]:
seri[["rf","reg"]]

rf      99
reg    121
dtype: int64

In [430]:
seri["reg"] = 130

In [431]:
seri["reg"]

130

In [432]:
seri["reg":"loj"]

reg    130
loj    200
dtype: int64

## Pandas DataFrame Olusturma

In [433]:
l = [1,2,39,67,90]

In [434]:
l

[1, 2, 39, 67, 90]

In [435]:
pd.DataFrame(l, columns = ["degisken_ismi"])

Unnamed: 0,degisken_ismi
0,1
1,2
2,39
3,67
4,90


In [436]:
m = np.arange(1,10).reshape((3,3))
m

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [437]:
pd.DataFrame(m, columns = ["var1", "var2", "var3"])

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [438]:
#df isimlendirme

In [439]:
df = pd.DataFrame(m, columns =  ["var1", "var2", "var3"])
df.head()

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [440]:
df.columns = ["deg1","deg2","deg3"]

In [441]:
df

Unnamed: 0,deg1,deg2,deg3
0,1,2,3
1,4,5,6
2,7,8,9


In [442]:
type(df)

pandas.core.frame.DataFrame

In [443]:
df.axes

[RangeIndex(start=0, stop=3, step=1),
 Index(['deg1', 'deg2', 'deg3'], dtype='object')]

In [444]:
df.shape

(3, 3)

In [445]:
df.ndim

2

In [446]:
df.size

9

In [447]:
df.values

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [448]:
type(df.values)

numpy.ndarray

In [449]:
df.head()

Unnamed: 0,deg1,deg2,deg3
0,1,2,3
1,4,5,6
2,7,8,9


In [450]:
df.tail(1)

Unnamed: 0,deg1,deg2,deg3
2,7,8,9


In [451]:
a = np.array([1,2,3,4,5])

In [452]:
pd.DataFrame(a, columns = ["deg1"])

Unnamed: 0,deg1
0,1
1,2
2,3
3,4
4,5


## DataFrame eleman islemleri

In [453]:
s1 = np.random.randint(10, size = 5)
s2 = np.random.randint(10, size = 5)
s3 = np.random.randint(10, size = 5)

In [454]:
sozluk = {"var1": s1, "var2": s2, "var3": s3}

In [455]:
sozluk

{'var1': array([3, 5, 7, 4, 2]),
 'var2': array([2, 3, 9, 8, 5]),
 'var3': array([5, 7, 2, 4, 4])}

In [456]:
df = pd.DataFrame(sozluk)

In [457]:
df

Unnamed: 0,var1,var2,var3
0,3,2,5
1,5,3,7
2,7,9,2
3,4,8,4
4,2,5,4


In [458]:
df[0:1]

Unnamed: 0,var1,var2,var3
0,3,2,5


In [459]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [460]:
df.index = ["a","b","c","d","e"]

In [461]:
df

Unnamed: 0,var1,var2,var3
a,3,2,5
b,5,3,7
c,7,9,2
d,4,8,4
e,2,5,4


In [462]:
df["c":"e"]

Unnamed: 0,var1,var2,var3
c,7,9,2
d,4,8,4
e,2,5,4


In [463]:
#silme

In [464]:
df.drop("a", axis = 0)

Unnamed: 0,var1,var2,var3
b,5,3,7
c,7,9,2
d,4,8,4
e,2,5,4


In [465]:
df

Unnamed: 0,var1,var2,var3
a,3,2,5
b,5,3,7
c,7,9,2
d,4,8,4
e,2,5,4


In [466]:
df.drop("a", axis = 0, inplace = True)     #insplace: kalıcı bir sekilde siliyor.

In [467]:
df

Unnamed: 0,var1,var2,var3
b,5,3,7
c,7,9,2
d,4,8,4
e,2,5,4


In [468]:
#fancy

In [469]:
l = ["c","e"]

In [470]:
df.drop(l, axis = 0)

Unnamed: 0,var1,var2,var3
b,5,3,7
d,4,8,4


In [471]:
#degiskenler icin

In [472]:
df

Unnamed: 0,var1,var2,var3
b,5,3,7
c,7,9,2
d,4,8,4
e,2,5,4


In [473]:
"var1" in df

True

In [474]:
l = ["var1","var4","var2"]

In [475]:
for i in l:
    print(i in df)

True
False
True


In [476]:
df

Unnamed: 0,var1,var2,var3
b,5,3,7
c,7,9,2
d,4,8,4
e,2,5,4


In [477]:
df["var1"]

b    5
c    7
d    4
e    2
Name: var1, dtype: int32

In [478]:
df["var4"] = df["var1"] / df["var2"]

In [479]:
df

Unnamed: 0,var1,var2,var3,var4
b,5,3,7,1.666667
c,7,9,2,0.777778
d,4,8,4,0.5
e,2,5,4,0.4


In [480]:
df.drop("var4", axis = 1)

Unnamed: 0,var1,var2,var3
b,5,3,7
c,7,9,2
d,4,8,4
e,2,5,4


In [481]:
df

Unnamed: 0,var1,var2,var3,var4
b,5,3,7,1.666667
c,7,9,2,0.777778
d,4,8,4,0.5
e,2,5,4,0.4


In [482]:
df.drop("var4", axis = 1, inplace = True)

In [483]:
df

Unnamed: 0,var1,var2,var3
b,5,3,7
c,7,9,2
d,4,8,4
e,2,5,4


In [484]:
l = ["var1","var2"]

In [485]:
df.drop(l, axis = 1)

Unnamed: 0,var3
b,7
c,2
d,4
e,4


In [487]:
df

Unnamed: 0,var1,var2,var3
b,5,3,7
c,7,9,2
d,4,8,4
e,2,5,4
