## Pandas Endeks Objesi

In [1]:
import numpy as np
import pandas as pd
ind = pd.Index([2, 3, 5, 7, 11])
ind

Int64Index([2, 3, 5, 7, 11], dtype='int64')

## Değiştirilemez bir dizi gibi endeks

In [2]:
ind[1]

3

In [3]:
ind[::2]

Int64Index([2, 5, 11], dtype='int64')

In [4]:
print(ind.size, ind.shape, ind.ndim, ind.dtype)

5 (5,) 1 int64


In [5]:
ind[1] = 0
## dönen bilgi Index 
## endekler değiştirilemez.

TypeError: Index does not support mutable operations

## Sıralı bir set gibi endeks

In [6]:
indA = pd.Index([1, 3, 5, 7, 9])
indB = pd.Index([2, 3, 5, 7, 11])

In [7]:
## & ve işaretidir. Kesişimini verir
indA & indB  

Int64Index([3, 5, 7], dtype='int64')

In [8]:
## | veya işaretidir. Birleşimi verir  
indA | indB  

Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')

In [9]:
## Simetric Fark
indA ^ indB  

Int64Index([1, 2, 9, 11], dtype='int64')

## Veri Endeksleme ve Seçme

In [None]:
## Sözlük olarak serileri

In [10]:
import pandas as pd
data = pd.Series([0.25, 0.5, 0.75, 1.0],
                 index=['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [11]:
data['b']

0.5

In [13]:
'e' in data

False

In [14]:
data.keys()

Index(['a', 'b', 'c', 'd'], dtype='object')

In [15]:
list(data.items())

[('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)]

In [16]:
data['e'] = 1.25
data

a    0.25
b    0.50
c    0.75
d    1.00
e    1.25
dtype: float64

## Bir boyutlu dizi olarak Data Frameler.

In [17]:
# açık endeksle parçalama
data['a':'c']

a    0.25
b    0.50
c    0.75
dtype: float64

In [18]:
# açık tamsayılı endeksle parçalama
data[0:2]

a    0.25
b    0.50
dtype: float64

In [20]:
data > 0.3

a    False
b     True
c     True
d     True
e     True
dtype: bool

In [21]:
data < 0.8

a     True
b     True
c     True
d    False
e    False
dtype: bool

In [22]:
(data > 0.3)   &   (data < 0.8) 

a    False
b     True
c     True
d    False
e    False
dtype: bool

In [19]:
# maskeleme
data[   (data > 0.3)   &   (data < 0.8)    ]

b    0.50
c    0.75
dtype: float64

In [23]:
# fantezi endeksleme
data[    ['a', 'e']   ]

a    0.25
e    1.25
dtype: float64

## Endeksleyiciler : loc, iloc, ve ix

In [26]:
data = pd.Series(['a', 'b', 'c'], index=[1, 3, 5])
data

1    a
3    b
5    c
dtype: object

In [27]:
# endekslemeli açık endeksleme 
data[1]

'a'

In [28]:
# parçalamalı açık endeksleme 
data[1:3]

3    b
5    c
dtype: object

In [29]:
data.loc[1]

'a'

In [30]:
data.loc[1:3]

1    a
3    b
dtype: object

In [31]:
data.iloc[1]

'b'

In [32]:
data.iloc[1:3]

3    b
5    c
dtype: object

## Data Framelerede Veri Seçimi


## Bir sözlük olarak Data Frameler

In [33]:
alan = pd.Series({'California': 423967, 'Texas': 695662,
                  'New York': 141297, 'Florida': 170312,
                  'Illinois': 149995})
nufus = pd.Series({'California': 38332521, 'Texas': 26448193,
                 'New York': 19651127, 'Florida': 19552860,
                 'Illinois': 12882135})
data = pd.DataFrame({'alan':alan, 'nufus':nufus})
data

Unnamed: 0,alan,nufus
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127
Florida,170312,19552860
Illinois,149995,12882135


In [34]:
data['alan']

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: alan, dtype: int64

In [35]:
data.alan

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: alan, dtype: int64

In [36]:
data.alan is data['alan']

True

In [37]:
data.nufus is data['nufus']

True

In [38]:
data['yogunluk'] = data['nufus'] / data['alan']
data

Unnamed: 0,alan,nufus,yogunluk
California,423967,38332521,90.413926
Texas,695662,26448193,38.01874
New York,141297,19651127,139.076746
Florida,170312,19552860,114.806121
Illinois,149995,12882135,85.883763


## İki boyutlu dizi olarak Data Frameler

In [39]:
data.values

array([[4.23967000e+05, 3.83325210e+07, 9.04139261e+01],
       [6.95662000e+05, 2.64481930e+07, 3.80187404e+01],
       [1.41297000e+05, 1.96511270e+07, 1.39076746e+02],
       [1.70312000e+05, 1.95528600e+07, 1.14806121e+02],
       [1.49995000e+05, 1.28821350e+07, 8.58837628e+01]])

In [40]:
data.T

Unnamed: 0,California,Texas,New York,Florida,Illinois
alan,423967.0,695662.0,141297.0,170312.0,149995.0
nufus,38332520.0,26448190.0,19651130.0,19552860.0,12882140.0
yogunluk,90.41393,38.01874,139.0767,114.8061,85.88376


In [41]:
data.values[0]

array([4.23967000e+05, 3.83325210e+07, 9.04139261e+01])

In [42]:
data['alan']

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: alan, dtype: int64

In [43]:
data.iloc[:3, :2]

Unnamed: 0,alan,nufus
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127


In [44]:
data.loc[:'Texas', :'nufus']

Unnamed: 0,alan,nufus
California,423967,38332521
Texas,695662,26448193


In [45]:
data.ix[:3, :'nufus']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  retval = getattr(retval, self.name)._getitem_axis(key, axis=i)


Unnamed: 0,alan,nufus
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127


In [47]:
data.yogunluk > 100,

(California    False
 Texas         False
 New York       True
 Florida        True
 Illinois      False
 Name: yogunluk, dtype: bool,)

In [46]:
data.loc[data.yogunluk > 100, ['nufus', 'yogunluk']]

Unnamed: 0,nufus,yogunluk
New York,19651127,139.076746
Florida,19552860,114.806121


In [48]:
data.iloc[0, 2] = 90
data

Unnamed: 0,alan,nufus,yogunluk
California,423967,38332521,90.0
Texas,695662,26448193,38.01874
New York,141297,19651127,139.076746
Florida,170312,19552860,114.806121
Illinois,149995,12882135,85.883763


## Additional indexing conventions

In [49]:
data['Florida':'Illinois']

Unnamed: 0,alan,nufus,yogunluk
Florida,170312,19552860,114.806121
Illinois,149995,12882135,85.883763


In [50]:
data[1:3]

Unnamed: 0,alan,nufus,yogunluk
Texas,695662,26448193,38.01874
New York,141297,19651127,139.076746


In [51]:
data[data.yogunluk > 100]

Unnamed: 0,alan,nufus,yogunluk
New York,141297,19651127,139.076746
Florida,170312,19552860,114.806121


## Evrensel Fonksiyonlar Endeks önrezervasyonu

In [52]:
## Pandas NumPy ile çalışacak şekilde tasarlanmıştır. 
## Herhangi bir NumPy evrensel fonksiyonu Pandas Serilerleve Data Frame objeleri ile öalışır. 

In [53]:
import pandas as pd
import numpy as np

In [54]:
rng = np.random.RandomState(42)
ser = pd.Series(rng.randint(0, 10, 4))
ser

0    6
1    3
2    7
3    4
dtype: int64

In [55]:
df = pd.DataFrame(rng.randint(0, 10, (3, 4)),
                  columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
0,6,9,2,6
1,7,4,3,7
2,7,2,5,4


In [56]:
np.exp(ser)

0     403.428793
1      20.085537
2    1096.633158
3      54.598150
dtype: float64

In [57]:
np.sin(df * np.pi / 4)

Unnamed: 0,A,B,C,D
0,-1.0,0.7071068,1.0,-1.0
1,-0.707107,1.224647e-16,0.707107,-0.7071068
2,-0.707107,1.0,-0.707107,1.224647e-16
