___


<p style="text-align: center;"><img src="https://docs.google.com/uc?id=1lY0Uj5R04yMY3-ZppPWxqCr5pvBLYPnV" class="img-fluid" alt="Rossum"></p>

___

## Creating a Pandas Series

In [1]:
import numpy as np
import pandas as pd

### Creating a Pandas Series with Basic Format

In [2]:
ser=pd.Series([5,10,15,20,25])

In [3]:
type(ser)

pandas.core.series.Series

In [4]:
ser.dtype

dtype('int64')

In [5]:
ser

0     5
1    10
2    15
3    20
4    25
dtype: int64

### Basic Attributes of Series

In [6]:
type(ser)

pandas.core.series.Series

In [7]:
ser.dtype

dtype('int64')

In [16]:
ser.size

5

In [8]:
ser.ndim

1

In [9]:
ser

0     5
1    10
2    15
3    20
4    25
dtype: int64

In [10]:
ser.values

array([ 5, 10, 15, 20, 25], dtype=int64)

In [14]:
ser.index

RangeIndex(start=0, stop=5, step=1)

In [15]:
ser

0     5
1    10
2    15
3    20
4    25
dtype: int64

In [17]:
ser.head(2)

0     5
1    10
dtype: int64

In [18]:
ser.tail(3)

2    15
3    20
4    25
dtype: int64

In [20]:
ser

0     5
1    10
2    15
3    20
4    25
dtype: int64

In [29]:
ser.value_counts()

25    1
10    1
20    1
5     1
15    1
dtype: int64

### Creating Pandas Series by Using a ``list``, numpy array or ``dict``ionary

In [30]:
labels = [i for i in 'python']
my_list = list(np.arange(6))
d = dict(zip(labels,my_list))

arr = np.array([10, 20, 30,40,50,60])


In [37]:
labels

['p', 'y', 't', 'h', 'o', 'n']

In [34]:
my_list

[0, 1, 2, 3, 4, 5]

In [35]:
d

{'p': 0, 'y': 1, 't': 2, 'h': 3, 'o': 4, 'n': 5}

### **Using Lists**

In [38]:
pd.Series(labels)

0    p
1    y
2    t
3    h
4    o
5    n
dtype: object

In [41]:
d

{'p': 0, 'y': 1, 't': 2, 'h': 3, 'o': 4, 'n': 5}

In [40]:
pd.Series(d)

p    0
y    1
t    2
h    3
o    4
n    5
dtype: int64

### **Using NumPy Arrays**

In [42]:
arr

array([10, 20, 30, 40, 50, 60])

In [43]:
labels

['p', 'y', 't', 'h', 'o', 'n']

In [46]:
pd.Series( data = labels, index = arr)  # data = values sonra index

10    p
20    y
30    t
40    h
50    o
60    n
dtype: object

### **Using Dictionary**

In [47]:
d

{'p': 0, 'y': 1, 't': 2, 'h': 3, 'o': 4, 'n': 5}

In [48]:
sam = {"11":"emir","12":"tommy","13":"hüseyin"}

In [56]:
pd.Series(sam)

11       emir
12      tommy
13    hüseyin
dtype: object

In [57]:
pd.Series(d)

p    0
y    1
t    2
h    3
o    4
n    5
dtype: int64

In [60]:
d

{'p': 0, 'y': 1, 't': 2, 'h': 3, 'o': 4, 'n': 5}

In [64]:
pd.Series(data = d, index= ['q', 'o', 'y','t','k','p'])

q    NaN
o    4.0
y    1.0
t    2.0
k    NaN
p    0.0
dtype: float64

### Data in a Series

A pandas Series can hold a variety of object types:

In [65]:
pd.Series({1,2,3,6})

TypeError: 'set' type is unordered

In [66]:
pd.Series(set)

0    <class 'set'>
dtype: object

In [71]:
pd.Series(['pandas', 5, False,np.median , len])

0                                     pandas
1                                          5
2                                      False
3    <function median at 0x000001F58756CA60>
4                    <built-in function len>
dtype: object

***



## Indexing Pandas Series

The key to using a Series is understanding its index. Pandas makes use of these index names or numbers by allowing for fast look up of information.

Let's see some examples of how to grab information from a Series. Let us create two sereis, ser1 and ser2:

In [86]:
ser1 = pd.Series(data = [1,2,3,4,5],index = ['numpy', 'pandas','sql', 'gss','scipy'])                      

In [85]:
ser2 = pd.Series([1,2,5,4,6,7],index = ['numpy', 'pandas','tableau', 'seaborn','matplotlib',"huseyin"])

In [75]:
ser1

numpy     1
pandas    2
sql       3
gss       4
scipy     5
dtype: int64

In [77]:
ser2

numpy         1
pandas        2
tableau       5
seaborn       4
matplotlib    6
dtype: int64

In [83]:
ser1['numpy']

1

In [78]:
ser2['matplotlib']

6

Operations are then also done based off of index:

In [87]:
ser1

numpy     1
pandas    2
sql       3
gss       4
scipy     5
dtype: int64

In [88]:
ser2

numpy         1
pandas        2
tableau       5
seaborn       4
matplotlib    6
huseyin       7
dtype: int64

In [89]:
(ser1 + ser2)

gss           NaN
huseyin       NaN
matplotlib    NaN
numpy         2.0
pandas        4.0
scipy         NaN
seaborn       NaN
sql           NaN
tableau       NaN
dtype: float64

In [90]:
ser1.add(ser2, fill_value=0)

gss           4.0
huseyin       7.0
matplotlib    6.0
numpy         2.0
pandas        4.0
scipy         5.0
seaborn       4.0
sql           3.0
tableau       5.0
dtype: float64

### Indexing Examples

In [94]:
np.random.seed(101)
a = np.array(np.random.randint(5,12,6))
panser = pd.Series(a)
panser

0     8
1     6
2    11
3    10
4     8
5     6
dtype: int32

In [96]:
panser[3:]

3    10
4     8
5     6
dtype: int32

### pandas_series[index] | pandas_series[[indices, indices...]]

In [97]:
panser = pd.Series(data = [25, 32, 35, 33], index = ["terry", "micheal", "orion", "jason"])
panser

terry      25
micheal    32
orion      35
jason      33
dtype: int64

In [104]:
panser[[2]]

orion    35
dtype: int64

In [99]:
index1 = ['terry', 'micheal', 'jason']

In [103]:
panser[[0,1,3]]

terry      25
micheal    32
jason      33
dtype: int64

In [105]:
panser

terry      25
micheal    32
orion      35
jason      33
dtype: int64

In [102]:
panser['terry':'orion']

terry      25
micheal    32
orion      35
dtype: int64

In [49]:
panser[0:3]

terry      121
micheal    200
orion      150
dtype: int64

### Several Selecting Attributes

In [106]:
panser

terry      25
micheal    32
orion      35
jason      33
dtype: int64

In [107]:
panser.keys()

Index(['terry', 'micheal', 'orion', 'jason'], dtype='object')

In [108]:
panser.index

Index(['terry', 'micheal', 'orion', 'jason'], dtype='object')

In [109]:
panser.values

array([25, 32, 35, 33], dtype=int64)

In [114]:
panser

terry      25
micheal    32
orion      35
jason      33
dtype: int64

In [117]:
panser.items()

<zip at 0x1f58b6300c0>

In [107]:
list(panser.items())

[('terry', 25), ('micheal', 32), ('orion', 35), ('jason', 33)]

In [118]:
panser

terry      25
micheal    32
orion      35
jason      33
dtype: int64

In [129]:
'jason' in panser #(panser.index, panser.keys())

True

In [131]:
d = {"1":"emir","2":"recep"}

In [136]:
"1" in d

True

In [126]:
25 in panser.values

True

In [137]:
panser

terry      25
micheal    32
orion      35
jason      33
dtype: int64

In [138]:
panser['terry'] = 99

In [139]:
panser

terry      99
micheal    32
orion      35
jason      33
dtype: int64

In [143]:
panser

terry      99
micheal    32
orion      35
jason      33
dtype: int64

In [149]:
panser[~(panser>32)]  # ~ işareti conditional ın tersini getirir.

micheal    32
dtype: int64

DataFrames

In [150]:
data = np.arange(1,21,4)
data

array([ 1,  5,  9, 13, 17])

In [152]:
pd.Series(data = data)

0     1
1     5
2     9
3    13
4    17
dtype: int32

In [156]:
pd.DataFrame(data = data, columns=['column1'])

Unnamed: 0,column1
0,1
1,5
2,9
3,13
4,17


In [157]:
pd.DataFrame(data = data)  # columns default 0 verir.

Unnamed: 0,0
0,1
1,5
2,9
3,13
4,17


In [158]:
data2 = np.arange(1,18,2).reshape(3,3)
data2

array([[ 1,  3,  5],
       [ 7,  9, 11],
       [13, 15, 17]])

In [164]:
df3=pd.DataFrame(data = data2, columns = ['var1', 'var2', 'var3'], index = ["a","b","c"])
df3

Unnamed: 0,var1,var2,var3
a,1,3,5
b,7,9,11
c,13,15,17


In [166]:
df3=pd.DataFrame(data2,["a","b","c"],['var1', 'var2', 'var3'])  # data, index, columns (positional girilince)
df3

Unnamed: 0,var1,var2,var3
a,1,3,5
b,7,9,11
c,13,15,17


In [167]:
df3.columns

Index(['var1', 'var2', 'var3'], dtype='object')

In [168]:
df3.index

Index(['a', 'b', 'c'], dtype='object')

In [171]:
df3

Unnamed: 0,var1,var2,var3
a,1,3,5
b,7,9,11
c,13,15,17


In [176]:
(df3.var1.value_counts()).sort_index()  # value_counts columns değerlerini index yaparak value olarak da miktarını verir.

1     1
7     1
13    1
Name: var1, dtype: int64

In [177]:
df3

Unnamed: 0,var1,var2,var3
a,1,3,5
b,7,9,11
c,13,15,17


In [178]:
df3.columns = ['new1', 'new2','new3']

In [179]:
df3

Unnamed: 0,new1,new2,new3
a,1,3,5
b,7,9,11
c,13,15,17


In [180]:
df3.index = ["recep", "hocam", "adamdır"]

In [181]:
df3

Unnamed: 0,new1,new2,new3
recep,1,3,5
hocam,7,9,11
adamdır,13,15,17


for your info:

* 1 dim --> vector 
* 2 dim --> matrix
* 3 dim --> tensor

 - ### Creating a DataFrame using a ``NumPy Arrays``

In [182]:
m=np.arange(1,50,5).reshape(5,2)
m

array([[ 1,  6],
       [11, 16],
       [21, 26],
       [31, 36],
       [41, 46]])

In [184]:
df=pd.DataFrame(m, columns=['col1','col2'])
df

Unnamed: 0,col1,col2
0,1,6
1,11,16
2,21,26
3,31,36
4,41,46


In [198]:
df.sample(3)

Unnamed: 0,col1,col2
0,1,6
2,21,26
1,11,16


 - ### Creating a DataFrame using a ``dict``

In [201]:
np.random.seed(101)
s1 = np.random.randint(10,18, size = 4)
s2 = np.random.randint(19,27, size = 4)
s3 = np.random.randint(28,35, size = 4)

In [202]:
s1

array([17, 13, 11, 16])

In [203]:
s2

array([26, 26, 24, 22])

In [204]:
s3

array([29, 33, 28, 32])

In [206]:
myDict= {'var1':s1,'var2':s2,'var3':s3}

In [207]:
df1 = pd.DataFrame(myDict)

In [155]:
df1

Unnamed: 0,var1,var2,var3
0,17,26,29
1,13,26,33
2,11,24,28
3,16,22,32


- Simple indexing and slicing the ``DataFrames``

In [158]:
df1[1:3]

Unnamed: 0,var1,var2,var3
1,13,26,33
2,11,24,28


In [208]:
df1.index

RangeIndex(start=0, stop=4, step=1)

In [209]:
df1.index = ["a", "b", "c", "d"]

In [210]:
df1

Unnamed: 0,var1,var2,var3
a,17,26,29
b,13,26,33
c,11,24,28
d,16,22,32


In [211]:
df1['b':'d']

Unnamed: 0,var1,var2,var3
b,13,26,33
c,11,24,28
d,16,22,32


In [212]:
# we can check any column name whether it belongs to the DataFrame or not
"var2" in df1

True

In [213]:
'var5' in df1

False

### Now, let's examine again the ***idexing, selection*** and ***slicing*** methods and several ***attributes*** using a different DataFrame

In [214]:
from numpy.random import randn
np.random.seed(101)

In [215]:
randn(5,4)

array([[ 2.70684984,  0.62813271,  0.90796945,  0.50382575],
       [ 0.65111795, -0.31931804, -0.84807698,  0.60596535],
       [-2.01816824,  0.74012206,  0.52881349, -0.58900053],
       [ 0.18869531, -0.75887206, -0.93323722,  0.95505651],
       [ 0.19079432,  1.97875732,  2.60596728,  0.68350889]])

In [216]:
'w x y z'.split()

['w', 'x', 'y', 'z']

In [217]:
'a b c d e'.split()

['a', 'b', 'c', 'd', 'e']

In [269]:
# creating a DataFrame by "keyword arguments"
df3=pd.DataFrame(randn(5,4),columns='w x y z'.split(), index='a b c d e'.split())
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765


## Selection and Indexing

Let's learn the various methods to grab data from a DataFrame

In [220]:
df3["y"]

a   -1.706086
b    0.166905
c    0.638787
d   -0.943406
e    0.238127
Name: y, dtype: float64

In [222]:
df3.y

a   -1.706086
b    0.166905
c    0.638787
d   -0.943406
e    0.238127
Name: y, dtype: float64

#### DataFrame Columns are just Series

In [223]:
df3[['y']]

Unnamed: 0,y
a,-1.706086
b,0.166905
c,0.638787
d,-0.943406
e,0.238127


In [243]:
# Pass a list of column names  (row .loc ya da .iloc)
df3[['w',"z"]]

Unnamed: 0,w,z
a,0.302665,-1.159119
b,-0.134841,0.184502
c,0.807706,0.329646
d,-0.497104,0.484752
e,-0.116773,1.996652


In [232]:
df3

Unnamed: 0,w,x,y,z
a,0.302665,1.693723,-1.706086,-1.159119
b,-0.134841,0.390528,0.166905,0.184502
c,0.807706,0.07296,0.638787,0.329646
d,-0.497104,-0.75407,-0.943406,0.484752
e,-0.116773,1.901755,0.238127,1.996652


In [249]:
df3[2:5]

Unnamed: 0,w,x,y,z
c,0.807706,0.07296,0.638787,0.329646
d,-0.497104,-0.75407,-0.943406,0.484752
e,-0.116773,1.901755,0.238127,1.996652


In [250]:
df3.iloc[2:5]

Unnamed: 0,w,x,y,z
c,0.807706,0.07296,0.638787,0.329646
d,-0.497104,-0.75407,-0.943406,0.484752
e,-0.116773,1.901755,0.238127,1.996652


In [251]:
df3.loc["c":"e"]

Unnamed: 0,w,x,y,z
c,0.807706,0.07296,0.638787,0.329646
d,-0.497104,-0.75407,-0.943406,0.484752
e,-0.116773,1.901755,0.238127,1.996652


In [248]:
df3[["w","z"]]

Unnamed: 0,w,z
a,0.302665,-1.159119
b,-0.134841,0.184502
c,0.807706,0.329646
d,-0.497104,0.484752
e,-0.116773,1.996652


In [359]:
df3["c":"e"]  # slicelama yaparken satır getirmek için loc veya iloc yapmaya gerek yok

Unnamed: 0,w,x,y,z
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,-1.005187,-0.74179,0.187125,-0.732845


In [362]:
df4=pd.DataFrame(randn(5,4),columns='w x y z'.split(), index='a b w d e'.split())
df4

Unnamed: 0,w,x,y,z
a,0.062083,0.265864,-0.095195,0.633114
b,-0.214138,1.436661,-0.285115,0.263288
w,1.510803,1.698778,-0.396821,-2.898352
d,0.769996,0.546213,1.168932,0.988046
e,0.987614,0.44744,1.088018,-0.601886


In [363]:
df4[["w"]]  

Unnamed: 0,w
a,0.062083
b,-0.214138
w,1.510803
d,0.769996
e,0.987614


In [364]:
df4["w":"e"]

Unnamed: 0,w,x,y,z
w,1.510803,1.698778,-0.396821,-2.898352
d,0.769996,0.546213,1.168932,0.988046
e,0.987614,0.44744,1.088018,-0.601886


In [258]:
df4.loc[["w"]]

Unnamed: 0,w,x,y,z
w,2.154846,-0.610259,-0.755325,-0.346419


**Creating a new column:**

In [270]:
df3['w+z']=df3['w']+df3['z']
df3

Unnamed: 0,w,x,y,z,w+z
a,0.38603,2.084019,-0.376519,0.230336,0.616367
b,0.681209,1.035125,-0.03116,1.939932,2.621142
c,-1.005187,-0.74179,0.187125,-0.732845,-1.738032
d,-1.38292,1.482495,0.961458,-2.141212,-3.524132
e,0.992573,1.192241,-1.04678,1.292765,2.285338


 - ### Removing Columns

In [271]:
# Not inplace unless specified!
df3.drop('w+z', axis=1, inplace=True)

In [272]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765


 - ### Removing rows

In [273]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765


In [301]:
# the default value of axis is 0 (axis = 0)
df4=df3.drop('c',axis=0)  # aynı isimde satır ve sütun var ise drop içinde axis default 0 olduğu için, axis verilmezse satır olan düşer. 

In [290]:
#df3

In [302]:
df4

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765


In [303]:
df4 = df4.append(df3.iloc[2])

In [304]:
df4

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765
c,-1.005187,-0.74179,0.187125,-0.732845


In [305]:
df4 = df4.sort_index()

In [306]:
df4

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765


### Selecting Rows

- ### First, let's take a quick look at [`.loc[]`](http://localhost:8888/notebooks/pythonic/DAwPythonSessions/w3resource-pandas-dataframe-loc.ipynb) | [`.iloc[]`](http://localhost:8888/notebooks/pythonic/DAwPythonSessions/w3resource-pandas-dataframe-iloc.ipynb)

#### `.loc[]` → allows us to select data using **labels** (names) of rows (index) & columns

#### `.iloc[]` → allows us to select data using **index numbers** of rows (index) & columns. it's like classical indexing logic

In [307]:
np.random.seed(101)
m=np.random.randint(1,20, size=(5,4))
df4 = pd.DataFrame(m, columns = ["var1","var2","var3",'var4'])
df4

Unnamed: 0,var1,var2,var3,var4
0,12,18,7,12
1,16,10,14,9
2,5,9,1,15
3,6,13,9,18
4,16,9,3,13


In [201]:
df4.loc[4]

var1    16
var2     9
var3     3
var4    13
Name: 4, dtype: int32

In [203]:
# Slicing produces the same type of the data. Here, DataFrame
df4.loc[2:4]

Unnamed: 0,var1,var2,var3,var4
2,5,9,1,15
3,6,13,9,18
4,16,9,3,13


In [204]:
df4.iloc[2:4]

Unnamed: 0,var1,var2,var3,var4
2,5,9,1,15
3,6,13,9,18


In [308]:
df4.index='a b c d e'.split()
df4

Unnamed: 0,var1,var2,var3,var4
a,12,18,7,12
b,16,10,14,9
c,5,9,1,15
d,6,13,9,18
e,16,9,3,13


In [209]:
df4.iloc[1:4]

Unnamed: 0,var1,var2,var3,var4
b,16,10,14,9
c,5,9,1,15
d,6,13,9,18


In [210]:
df4.loc['b':'e']

Unnamed: 0,var1,var2,var3,var4
b,16,10,14,9
c,5,9,1,15
d,6,13,9,18
e,16,9,3,13


In [211]:
df4

Unnamed: 0,var1,var2,var3,var4
a,12,18,7,12
b,16,10,14,9
c,5,9,1,15
d,6,13,9,18
e,16,9,3,13


In [309]:
df4

Unnamed: 0,var1,var2,var3,var4
a,12,18,7,12
b,16,10,14,9
c,5,9,1,15
d,6,13,9,18
e,16,9,3,13


In [214]:
df4.loc['b':'e','var3']

b    14
c     1
d     9
e     3
Name: var3, dtype: int32

In [318]:
# how can we select these data as a DataFrame not a series
df4.loc['b':'d']['var2']

b    10
c     9
d    13
Name: var2, dtype: int32

In [216]:
df4.iloc[2:5,2]

c    1
d    9
e    3
Name: var3, dtype: int32

In [218]:
df4.iloc[2:5][['var3']]

Unnamed: 0,var3
c,1
d,9
e,3


In [222]:
df3.loc['c']

w   -1.005187
x   -0.741790
y    0.187125
z   -0.732845
Name: c, dtype: float64

In [331]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765


In [338]:
df3.loc["c"]

w   -1.005187
x   -0.741790
y    0.187125
z   -0.732845
Name: c, dtype: float64

In [336]:
df3.loc[["c"],["y","z"]].T

Unnamed: 0,c
y,0.187125
z,-0.732845


In [339]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765


In [345]:
df3.loc["e"] = df3.iloc[2]

In [344]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765
3,0.992573,1.192241,-1.04678,1.292765


In [350]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,-1.005187,-0.74179,0.187125,-0.732845
3,0.992573,1.192241,-1.04678,1.292765


In [356]:
df3.loc[["a"],["w"]]

Unnamed: 0,w
a,0.38603


### Selecting subset of rows and columns

 - ### `.loc[[row labels|names], [column labels|names]]`

 - ### `.iloc[[row index numbers], [column index numbers]]`

In [368]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,-1.005187,-0.74179,0.187125,-0.732845
3,0.992573,1.192241,-1.04678,1.292765


In [369]:
df3.loc[["c"],["y"]]

Unnamed: 0,y
c,0.187125


In [366]:
df3.loc['c','y']

0.1871245217641948

In [371]:
df3.loc[["c"]][["y"]]

Unnamed: 0,y
c,0.187125


In [227]:
# let's select the same data as a DataFrame
df3.loc[['c'],['w']]

Unnamed: 0,w
c,-1.005187


In [373]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,-1.005187,-0.74179,0.187125,-0.732845
3,0.992573,1.192241,-1.04678,1.292765


In [377]:
df3.loc[['c','d'],['w','z']]

Unnamed: 0,w,z
c,-1.005187,-0.732845
d,-1.38292,-2.141212


In [381]:
df3.loc["c":"e"][["w","z"]]   # data.loc[slicing][column]

Unnamed: 0,w,z
c,-1.005187,-0.732845
d,-1.38292,-2.141212
e,-1.005187,-0.732845


In [388]:
df3.iloc[[0,2],[0,3]]

Unnamed: 0,w,z
a,0.38603,0.230336
c,-1.005187,-0.732845


### Conditional Selection

An important feature of pandas is conditional selection using bracket notation, very similar to numpy:

In [389]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,-1.005187,-0.74179,0.187125,-0.732845
3,0.992573,1.192241,-1.04678,1.292765


In [399]:
df3[df3>0.2]

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,,0.230336
b,0.681209,1.035125,,1.939932
c,,,,
d,,1.482495,0.961458,
e,,,,
3,0.992573,1.192241,,1.292765


In [400]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,-1.005187,-0.74179,0.187125,-0.732845
3,0.992573,1.192241,-1.04678,1.292765


In [392]:
# It returns based on rows.
df3[(df3['y']<0.5)]

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
e,-1.005187,-0.74179,0.187125,-0.732845
3,0.992573,1.192241,-1.04678,1.292765


In [402]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,-1.005187,-0.74179,0.187125,-0.732845
3,0.992573,1.192241,-1.04678,1.292765


In [405]:
df3[df3['w']<0.8][['z','y']]  # farklı bir dataframe df3[df3['w']<0.8]

Unnamed: 0,z,y
a,0.230336,-0.376519
b,1.939932,-0.03116
c,-0.732845,0.187125
d,-2.141212,0.961458
e,-0.732845,0.187125


In [411]:
df3[df3[['w']]<0.8]#[['z','y']]  # conditional seri içinde false verirse getirmez. DataFrame içinde getirir ama NaN verir.

Unnamed: 0,w,x,y,z
a,0.38603,,,
b,0.681209,,,
c,-1.005187,,,
d,-1.38292,,,
e,-1.005187,,,
3,,,,


#### For two conditions you can use **|** → `or`,  **&** →  `and` with parenthesis:

df[(cond1)|(cond2)&()]

In [412]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,-1.005187,-0.74179,0.187125,-0.732845
3,0.992573,1.192241,-1.04678,1.292765


In [425]:
df3[(df3['w']>0) & (df3['z']<1)]

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336


#### Conditional selection using ``.loc[]`` and ``.iloc[]``

In [238]:
df3

Unnamed: 0,w,x,y,z
a,0.38603,2.084019,-0.376519,0.230336
b,0.681209,1.035125,-0.03116,1.939932
c,-1.005187,-0.74179,0.187125,-0.732845
d,-1.38292,1.482495,0.961458,-2.141212
e,0.992573,1.192241,-1.04678,1.292765


In [423]:
df3.loc[, ['w','z']]

Unnamed: 0,w,z
c,-1.005187,-0.732845
d,-1.38292,-2.141212
e,-1.005187,-0.732845


In [429]:
df3.loc[((df3.x>1) | (df3.y<1)), ["y","z"]]

Unnamed: 0,y,z
a,-0.376519,0.230336
b,-0.03116,1.939932
c,0.187125,-0.732845
d,0.961458,-2.141212
e,0.187125,-0.732845
3,-1.04678,1.292765


## More Index Details

Let's discuss some more features of indexing, including resetting the index or setting it something else. We'll also talk about index hierarchy!

In [459]:
df3

Unnamed: 0,index,w,x,y,z
0,CA,0.38603,2.084019,-0.376519,0.230336
1,NY,0.681209,1.035125,-0.03116,1.939932
2,WY,-1.005187,-0.74179,0.187125,-0.732845
3,OR,-1.38292,1.482495,0.961458,-2.141212
4,CO,-1.005187,-0.74179,0.187125,-0.732845
5,eb,0.992573,1.192241,-1.04678,1.292765


In [460]:
# Reset to default 0,1...n index
df3.reset_index()

Unnamed: 0,level_0,index,w,x,y,z
0,0,CA,0.38603,2.084019,-0.376519,0.230336
1,1,NY,0.681209,1.035125,-0.03116,1.939932
2,2,WY,-1.005187,-0.74179,0.187125,-0.732845
3,3,OR,-1.38292,1.482495,0.961458,-2.141212
4,4,CO,-1.005187,-0.74179,0.187125,-0.732845
5,5,eb,0.992573,1.192241,-1.04678,1.292765


In [461]:
df3.reset_index(drop=True)

Unnamed: 0,index,w,x,y,z
0,CA,0.38603,2.084019,-0.376519,0.230336
1,NY,0.681209,1.035125,-0.03116,1.939932
2,WY,-1.005187,-0.74179,0.187125,-0.732845
3,OR,-1.38292,1.482495,0.961458,-2.141212
4,CO,-1.005187,-0.74179,0.187125,-0.732845
5,eb,0.992573,1.192241,-1.04678,1.292765


In [450]:
index='CA NY WY OR CO eb'.split()
index

['CA', 'NY', 'WY', 'OR', 'CO', 'eb']

In [451]:
df3['index']=newindx

In [452]:
df3

Unnamed: 0,w,x,y,z,index
0,0.38603,2.084019,-0.376519,0.230336,CA
1,0.681209,1.035125,-0.03116,1.939932,NY
2,-1.005187,-0.74179,0.187125,-0.732845,WY
3,-1.38292,1.482495,0.961458,-2.141212,OR
4,-1.005187,-0.74179,0.187125,-0.732845,CO
5,0.992573,1.192241,-1.04678,1.292765,eb


In [454]:
df3.set_index('index',inplace=True)

In [455]:
df3

Unnamed: 0_level_0,w,x,y,z
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,0.38603,2.084019,-0.376519,0.230336
NY,0.681209,1.035125,-0.03116,1.939932
WY,-1.005187,-0.74179,0.187125,-0.732845
OR,-1.38292,1.482495,0.961458,-2.141212
CO,-1.005187,-0.74179,0.187125,-0.732845
eb,0.992573,1.192241,-1.04678,1.292765


In [456]:
df3.reset_index(inplace=True)

In [457]:
df3

Unnamed: 0,index,w,x,y,z
0,CA,0.38603,2.084019,-0.376519,0.230336
1,NY,0.681209,1.035125,-0.03116,1.939932
2,WY,-1.005187,-0.74179,0.187125,-0.732845
3,OR,-1.38292,1.482495,0.961458,-2.141212
4,CO,-1.005187,-0.74179,0.187125,-0.732845
5,eb,0.992573,1.192241,-1.04678,1.292765


In [458]:
df3.reset_index(drop=True)

Unnamed: 0,index,w,x,y,z
0,CA,0.38603,2.084019,-0.376519,0.230336
1,NY,0.681209,1.035125,-0.03116,1.939932
2,WY,-1.005187,-0.74179,0.187125,-0.732845
3,OR,-1.38292,1.482495,0.961458,-2.141212
4,CO,-1.005187,-0.74179,0.187125,-0.732845
5,eb,0.992573,1.192241,-1.04678,1.292765


In [445]:
df3.drop("newidx", axis = 1,  inplace=True)

In [446]:
df3

Unnamed: 0,w,x,y,z
0,0.38603,2.084019,-0.376519,0.230336
1,0.681209,1.035125,-0.03116,1.939932
2,-1.005187,-0.74179,0.187125,-0.732845
3,-1.38292,1.482495,0.961458,-2.141212
4,-1.005187,-0.74179,0.187125,-0.732845
5,0.992573,1.192241,-1.04678,1.292765


## Multi-Index and Index Hierarchy

Let us go over how to work with Multi-Index, first we'll create a quick example of what a Multi-Indexed DataFrame would look like:

In [255]:
# Index Levels
outside = ['M1', 'M1', 'M1', 'M2', 'M2', 'M2','M3', 'M3', 'M3']
inside = [1, 2, 3, 1, 2, 3, 5, 6, 7]
multi_index = list(zip(outside, inside))
multi_index

[('M1', 1),
 ('M1', 2),
 ('M1', 3),
 ('M2', 1),
 ('M2', 2),
 ('M2', 3),
 ('M3', 5),
 ('M3', 6),
 ('M3', 7)]

In [256]:
hier_index=pd.MultiIndex.from_tuples(multi_index)

In [257]:
hier_index

MultiIndex([('M1', 1),
            ('M1', 2),
            ('M1', 3),
            ('M2', 1),
            ('M2', 2),
            ('M2', 3),
            ('M3', 5),
            ('M3', 6),
            ('M3', 7)],
           )

In [259]:
df5=pd.DataFrame(np.random.randn(9,4), index = hier_index, columns=['A','B','C','D'])
df5

Unnamed: 0,Unnamed: 1,A,B,C,D
M1,1,-2.288978,0.441018,0.656973,0.136999
M1,2,-0.451982,-1.094163,-0.006354,-1.403832
M1,3,-0.202026,-1.089143,1.406601,1.577691
M2,1,0.545812,-0.693707,-0.389968,-0.170756
M2,2,-0.703198,2.067214,0.455452,-1.123753
M2,3,-1.088266,-0.609936,-0.635511,-0.728053
M3,5,-0.594951,-0.593371,0.911416,-1.768073
M3,6,0.205247,0.211945,-1.240039,1.0493
M3,7,-0.410414,0.340754,0.805027,0.528188


Now let's show how to index this! For index hierarchy we use ``df.loc[]``, if this was on the columns axis, you would just use normal bracket notation ``df[]``. Calling one level of the index returns the sub-dataframe:

In [273]:
df5.loc['M3']

Unnamed: 0_level_0,A,B,C,D
Num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5,-0.594951,-0.593371,0.911416,-1.768073
6,0.205247,0.211945,-1.240039,1.0493
7,-0.410414,0.340754,0.805027,0.528188


In [274]:
df5.loc['M2'].loc[2]

A   -0.703198
B    2.067214
C    0.455452
D   -1.123753
Name: 2, dtype: float64

In [275]:
df5.loc['M2'].loc[[2]]

Unnamed: 0_level_0,A,B,C,D
Num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,-0.703198,2.067214,0.455452,-1.123753


In [262]:
df5.index.names

FrozenList([None, None])

In [263]:
df5.index.names = ['Group','Num']

In [264]:
df5

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C,D
Group,Num,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
M1,1,-2.288978,0.441018,0.656973,0.136999
M1,2,-0.451982,-1.094163,-0.006354,-1.403832
M1,3,-0.202026,-1.089143,1.406601,1.577691
M2,1,0.545812,-0.693707,-0.389968,-0.170756
M2,2,-0.703198,2.067214,0.455452,-1.123753
M2,3,-1.088266,-0.609936,-0.635511,-0.728053
M3,5,-0.594951,-0.593371,0.911416,-1.768073
M3,6,0.205247,0.211945,-1.240039,1.0493
M3,7,-0.410414,0.340754,0.805027,0.528188


### let's take a quick look at the [``.xs()``](http://localhost:8888/notebooks/pythonic/DAwPythonSessions/w3resource-pandas-dataframe-xs.ipynb)

In [276]:
df5.xs('M3')

Unnamed: 0_level_0,A,B,C,D
Num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5,-0.594951,-0.593371,0.911416,-1.768073
6,0.205247,0.211945,-1.240039,1.0493
7,-0.410414,0.340754,0.805027,0.528188


In [277]:
df5.loc['M3']

Unnamed: 0_level_0,A,B,C,D
Num,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
5,-0.594951,-0.593371,0.911416,-1.768073
6,0.205247,0.211945,-1.240039,1.0493
7,-0.410414,0.340754,0.805027,0.528188


In [267]:
df5

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C,D
Group,Num,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
M1,1,-2.288978,0.441018,0.656973,0.136999
M1,2,-0.451982,-1.094163,-0.006354,-1.403832
M1,3,-0.202026,-1.089143,1.406601,1.577691
M2,1,0.545812,-0.693707,-0.389968,-0.170756
M2,2,-0.703198,2.067214,0.455452,-1.123753
M2,3,-1.088266,-0.609936,-0.635511,-0.728053
M3,5,-0.594951,-0.593371,0.911416,-1.768073
M3,6,0.205247,0.211945,-1.240039,1.0493
M3,7,-0.410414,0.340754,0.805027,0.528188


In [278]:
df5.xs(['M2',2])

A   -0.703198
B    2.067214
C    0.455452
D   -1.123753
Name: (M2, 2), dtype: float64

In [279]:
df5.xs(3, level = 'Num')

Unnamed: 0_level_0,A,B,C,D
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M1,-0.202026,-1.089143,1.406601,1.577691
M2,-1.088266,-0.609936,-0.635511,-0.728053


In [271]:
df5.xs(3, level = 1)

Unnamed: 0_level_0,A,B,C,D
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M1,-0.202026,-1.089143,1.406601,1.577691
M2,-1.088266,-0.609936,-0.635511,-0.728053


In [272]:
df5.xs('A',axis=1)

Group  Num
M1     1     -2.288978
       2     -0.451982
       3     -0.202026
M2     1      0.545812
       2     -0.703198
       3     -1.088266
M3     5     -0.594951
       6      0.205247
       7     -0.410414
Name: A, dtype: float64