# 創建 Series, DataFrame
### 可以列表或字典形式創建 Series, DataFrame
### 常用屬性: df.index, df.columns

In [52]:
import pandas as pd
import numpy as np
s = pd.Series([11,22,33], index=["a", "b", "c"])
s1 = pd.Series({"a": 11, "b": 22, "c": 33})
print("series:\n", s)
print("series s1:\n", s1)
print('---'*30)
d1 = pd.DataFrame([[1,2], [3,4]], index=['a', 'b'], columns=['A', 'B'])
d2 = pd.DataFrame(np.ones((2, 2)), index=['a', 'b'], columns=['A', 'B'])
d3 = pd.DataFrame({"col1": [1,3], "col2": [2, 4]})
d4 = pd.DataFrame({"col1": pd.Series([1, 3]), "col2": pd.Series([2, 4])})
print('d1:\n', d1)
print('d2:\n', d2)
print('d3:\n', d3)
print('d4:\n', d4)

series:
 a    11
b    22
c    33
dtype: int64
series s1:
 a    11
b    22
c    33
dtype: int64
------------------------------------------------------------------------------------------
d1:
    A  B
a  1  2
b  3  4
d2:
      A    B
a  1.0  1.0
b  1.0  1.0
d3:
    col1  col2
0     1     2
1     3     4
d4:
    col1  col2
0     1     2
1     3     4


## df.index:
## df.columns:
### df.columns.get_indexer(["A", "B"])  獲取column的index

In [53]:
data = np.arange(-12, 12).reshape((6, 4))
df = pd.DataFrame(data, index=list("abcdef"), columns=list("ABCD"))
df1 = df.columns[[0, 3]]  # 獲取第幾個column
df2 = df.columns.get_indexer(["A", "B"])  # 獲取column的index
print('df:\n', df)
print('d2.index:\n', df.index)
print('d2.columns:\n', df.columns)
print('df1:\n', df1)
print('df2:\n', df2)

df:
     A   B   C   D
a -12 -11 -10  -9
b  -8  -7  -6  -5
c  -4  -3  -2  -1
d   0   1   2   3
e   4   5   6   7
f   8   9  10  11
d2.index:
 Index(['a', 'b', 'c', 'd', 'e', 'f'], dtype='object')
d2.columns:
 Index(['A', 'B', 'C', 'D'], dtype='object')
df1:
 Index(['A', 'D'], dtype='object')
df2:
 [0 1]


# 數據選取
### df.loc: 文字切片
### df.iloc: 數字切片

In [54]:
import pandas as pd
import numpy as np

data = np.arange(-12, 12).reshape((6, 4))
df = pd.DataFrame(data, index=list("abcdef"), columns=list("ABCD"))

## df.loc[]:

In [55]:
df1 = df["B"]
df2 = df[["C", "B"]]
df3 = df.loc["c":"d", "B":"D"]  # 包頭包尾
df4 = df.loc[["d", "b"], :]
print('df:\n', df)
print('df1:\n', df1)
print('df2:\n', df2)
print('df3:\n', df3)
print('df4:\n', df4)

df:
     A   B   C   D
a -12 -11 -10  -9
b  -8  -7  -6  -5
c  -4  -3  -2  -1
d   0   1   2   3
e   4   5   6   7
f   8   9  10  11
df1:
 a   -11
b    -7
c    -3
d     1
e     5
f     9
Name: B, dtype: int32
df2:
     C   B
a -10 -11
b  -6  -7
c  -2  -3
d   2   1
e   6   5
f  10   9
df3:
    B  C  D
c -3 -2 -1
d  1  2  3
df4:
    A  B  C  D
d  0  1  2  3
b -8 -7 -6 -5


## df.iloc[]:

In [56]:
df1 = df.iloc[2:3, 1:3]
print('df:\n', df)
print("df1:\n", df.iloc[2:3, 1:3])

df:
     A   B   C   D
a -12 -11 -10  -9
b  -8  -7  -6  -5
c  -4  -3  -2  -1
d   0   1   2   3
e   4   5   6   7
f   8   9  10  11
df1:
    B  C
c -3 -2


## 條件篩選:

In [57]:
import pandas as pd
import numpy as np

data = np.arange(-12, 12).reshape((6, 4))
df = pd.DataFrame(data, index=list("abcdef"), columns=list("ABCD"))
df1 = df[df["A"] < 0]  # 利用True False矩陣來選值
df2 = df.loc[:, ~(df.iloc[0] < -10)]  # 第0列不小於-10的
df3 = df.loc[:, ~(df.iloc[0] < -10) | (df.iloc[0] < -11)]  # 第0列 > -10 or 第0列 < -11
print('df:\n', df)
print('df1:\n', df1)
print('df2:\n', df2)
print('df3:\n', df3)

df:
     A   B   C   D
a -12 -11 -10  -9
b  -8  -7  -6  -5
c  -4  -3  -2  -1
d   0   1   2   3
e   4   5   6   7
f   8   9  10  11
df1:
     A   B   C  D
a -12 -11 -10 -9
b  -8  -7  -6 -5
c  -4  -3  -2 -1
df2:
     C   D
a -10  -9
b  -6  -5
c  -2  -1
d   2   3
e   6   7
f  10  11
df3:
     A   C   D
a -12 -10  -9
b  -8  -6  -5
c  -4  -2  -1
d   0   2   3
e   4   6   7
f   8  10  11


# 類型轉換
### 可自由轉換 numpy, list和series之間的關係
### Series也可以轉換DataFrame藉此方便拼接

In [58]:
import pandas as ps 
l = [11,22,33]
s = pd.Series(l, index=["a", "b", "c"])
s_np = s.to_numpy()
s_list = s.values.tolist()
print("array:\n", s_np, type(s_np))
print("list:\n", s_list, type(s_list))
f = s.to_frame()  # Series to DataFrame
f_T = s.to_frame().T
print(f, '\n', type(f))
print(f_T, '\n', type(f_T))

array:
 [11 22 33] <class 'numpy.ndarray'>
list:
 [11, 22, 33] <class 'list'>
    0
a  11
b  22
c  33 
 <class 'pandas.core.frame.DataFrame'>
    a   b   c
0  11  22  33 
 <class 'pandas.core.frame.DataFrame'>
