# Pandas

- Series
- DataFrame

## Series  
Series is similar to one dimension array

- values
- index

In [1]:
from pandas import Series

In [3]:
s = Series(data = [1,2,3,"four"])
s

0       1
1       2
2       3
3    four
dtype: object

In [4]:
s = Series(data = [1,2,3,"four"], index = ['a','b','c','d']) # adding index can increase the readibility of the data
s

a       1
b       2
c       3
d    four
dtype: object

In [11]:
# get the value
s['d']

'four'

In [8]:
dic = {"math":100, "History":60, "chemistry": 20}
a = Series(data = dic)
a

math         100
History       60
chemistry     20
dtype: int64

In [12]:
a.math

100

In [13]:
a[0:2]

math       100
History     60
dtype: int64

## Common properties

In [15]:
s.shape

(4,)

In [16]:
s.size

4

In [17]:
s.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [18]:
s.dtype

dtype('O')

In [21]:
import numpy as np
s = Series(data = np.random.randint(60,100,size=(10,)))
s

0    72
1    79
2    88
3    94
4    83
5    92
6    82
7    85
8    79
9    92
dtype: int64

In [22]:
s.head()

0    72
1    79
2    88
3    94
4    83
dtype: int64

In [23]:
s.head(3)

0    72
1    79
2    88
dtype: int64

In [24]:
s.tail()

5    92
6    82
7    85
8    79
9    92
dtype: int64

In [25]:
s.unique()

array([72, 79, 88, 94, 83, 92, 82, 85])

In [26]:
s.isnull()

0    False
1    False
2    False
3    False
4    False
5    False
6    False
7    False
8    False
9    False
dtype: bool

In [27]:
s.notnull()

0    True
1    True
2    True
3    True
4    True
5    True
6    True
7    True
8    True
9    True
dtype: bool

In [28]:
s1 = Series(data = [1,2,3], index = ['a','b','c'])
s2 = Series(data = [1,2,3], index = ['a','d','c'])
s = s1+s2
s

a    2.0
b    NaN
c    6.0
d    NaN
dtype: float64

## DataFrame

In [29]:
from pandas import DataFrame

In [31]:
df = DataFrame(data= [[1,2,3],[4,5,6]])
df

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6


In [36]:
df = DataFrame(np.random.randint(10,60,size=(2,10)))
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,55,28,49,28,26,12,54,20,16,28
1,27,52,23,44,42,13,28,14,49,30


In [38]:
dic = {
    'name':['A','B','C'],
    'salary':[1000,2000,3000]
}
df = DataFrame(data = dic)
df

Unnamed: 0,name,salary
0,A,1000
1,B,2000
2,C,3000


### DataFrane properties

In [39]:
df.values

array([['A', 1000],
       ['B', 2000],
       ['C', 3000]], dtype=object)

In [40]:
df.columns

Index(['name', 'salary'], dtype='object')

In [41]:
df.index

RangeIndex(start=0, stop=3, step=1)

In [42]:
df.shape

(3, 2)

In [45]:
dic = {
    'A':[100,60,20,50],
    'B':[50,40,70,90]    
}

df = DataFrame(data= dic, index = ['math','English','history','chemistry'])
df

Unnamed: 0,A,B
math,100,50
English,60,40
history,20,70
chemistry,50,90


### DataFrame Search and Cut

In [55]:
df = DataFrame(data= np.random.randint(60,100,size=(8,4)),columns=['a','b','c','d'])
df

Unnamed: 0,a,b,c,d
0,67,98,89,86
1,74,75,61,80
2,66,64,78,77
3,62,75,98,76
4,75,68,77,70
5,68,75,70,83
6,60,75,94,69
7,80,66,62,87


In [59]:
df['a']  # get col

0    67
1    74
2    66
3    62
4    75
5    68
6    60
7    80
Name: a, dtype: int64

In [62]:
df[['a','c']]

Unnamed: 0,a,c
0,67,89
1,74,61
2,66,78
3,62,98
4,75,77
5,68,70
6,60,94
7,80,62


In [60]:
df.iloc[0] # get row by hidden index

a    67
b    98
c    89
d    86
Name: 0, dtype: int64

In [63]:
df.iloc[[0,3,5]]

Unnamed: 0,a,b,c,d
0,67,98,89,86
3,62,75,98,76
5,68,75,70,83


- iloc: get row by hidden index
- loc: get row by showing index

In [64]:
df.iloc[0,0]

67

In [65]:
df.loc[0,'a']

67

In [66]:
df.iloc[[0,1],0]

0    67
1    74
Name: a, dtype: int64

In [67]:
df[0:2]

Unnamed: 0,a,b,c,d
0,67,98,89,86
1,74,75,61,80


In [68]:
df.iloc[:,0:2]

Unnamed: 0,a,b
0,67,98
1,74,75
2,66,64
3,62,75
4,75,68
5,68,75
6,60,75
7,80,66
