# 範例
***

# [教學目標]

* 能夠使用不同的方法初始化一個陣列
* 知道固定大小對於陣列的意義
* 了解不同的亂數陣列有什麼差異



In [1]:
# 載入 NumPy, Pandas 套件
import numpy as np
import pandas as pd

# 檢查正確載入與版本
print(np)
print(np.__version__)
print(pd)
print(pd.__version__)

<module 'numpy' from '/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/numpy/__init__.py'>
1.20.1
<module 'pandas' from '/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/pandas/__init__.py'>
1.2.4


In [2]:
# Series

s = pd.Series([1,2,3])
print(s)
s

0    1
1    2
2    3
dtype: int64


0    1
1    2
2    3
dtype: int64

In [3]:
s = pd.Series([1,2,3],  index=['Amy', 'Bob', 'Tom'])
print(s)
s

Amy    1
Bob    2
Tom    3
dtype: int64


Amy    1
Bob    2
Tom    3
dtype: int64

In [4]:
# DataFrame

df = pd.DataFrame([1, 2, 3])
print(df)
df

   0
0  1
1  2
2  3


Unnamed: 0,0
0,1
1,2
2,3


In [5]:
df = pd.DataFrame([1, 2, 3], index=['a', 'b', 'c'], columns=['No'])
print(df)
df

   No
a   1
b   2
c   3


Unnamed: 0,No
a,1
b,2
c,3


In [6]:
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])

print(df)
df

   0  1  2
0  1  2  3
1  4  5  6


Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6


In [7]:
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], index=['a', 'b'], columns=['A', 'B', 'C'])
print(df)
df

   A  B  C
a  1  2  3
b  4  5  6


Unnamed: 0,A,B,C
a,1,2,3
b,4,5,6


In [8]:
df = pd.DataFrame({
    'Name': ['Alice', 'Bob'],
    'Age': [18, 20],
})
print(df)
df

    Name  Age
0  Alice   18
1    Bob   20


Unnamed: 0,Name,Age
0,Alice,18
1,Bob,20


In [14]:
df = pd.DataFrame([
  {'Name': 'Alice', 'Age': 18},
  {'Name': 'Bob', 'Age': 20}
])
print(df)
df

   Age   Name
0   18  Alice
1   20    Bob


Unnamed: 0,Age,Name
0,18,Alice
1,20,Bob


In [9]:
# DataFrame 是由 Series 组成的

df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], index=['a', 'b'], columns=['A', 'B', 'C'])
print(df['B'])
print(type(df['B']))


a    2
b    5
Name: B, dtype: int64
<class 'pandas.core.series.Series'>


In [10]:
# DataFrame 的常用屬性

print(df.shape)
print(df.size)
print(df.index)
print(df.columns)
print(df.values)


(2, 3)
6
Index(['a', 'b'], dtype='object')
Index(['A', 'B', 'C'], dtype='object')
[[1 2 3]
 [4 5 6]]


In [14]:
# 查看資料

print(df.head())#輸出最前的，預設輸出最前的五行
print()
print(df.tail())#輸出最後的，預設輸出最後五行
print()
print(df.describe())#生成描述性統計信息。
print()
print(df.info())#打印DataFrame的簡要、摘要


   A  B  C
a  1  2  3
b  4  5  6

   A  B  C
a  1  2  3
b  4  5  6

             A        B        C
count  2.00000  2.00000  2.00000
mean   2.50000  3.50000  4.50000
std    2.12132  2.12132  2.12132
min    1.00000  2.00000  3.00000
25%    1.75000  2.75000  3.75000
50%    2.50000  3.50000  4.50000
75%    3.25000  4.25000  5.25000
max    4.00000  5.00000  6.00000

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, a to b
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   A       2 non-null      int64
 1   B       2 non-null      int64
 2   C       2 non-null      int64
dtypes: int64(3)
memory usage: 64.0+ bytes
None


In [16]:
df1 = pd.DataFrame(data={
     'categracal':pd.Categorical(["a","g","e"]),
     'numeric':[1,3,6],
     "object":["r","D","T"]
})
df1

In [28]:
df1.describe()

Unnamed: 0,numeric
count,3.0
mean,3.333333
std,2.516611
min,1.0
25%,2.0
50%,3.0
75%,4.5
max,6.0


In [17]:
print(df1.describe(include=["category"]))
print(df1.describe(include=[np.object]))
print(df1.describe(include=[np.number]))

In [30]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   categracal  3 non-null      category
 1   numeric     3 non-null      int64   
 2   object      3 non-null      object  
dtypes: category(1), int64(1), object(1)
memory usage: 311.0+ bytes


In [29]:
print(df1.info(verbose=True))

In [31]:
# 存储为字符串
import io
buf = io.StringIO() # 创建一个StringIO，便于后续在内存中写入str
df1.info(buf=buf) # 写入
s = buf.getvalue() # 读取

In [34]:
s

"<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 3 entries, 0 to 2\nData columns (total 3 columns):\n #   Column      Non-Null Count  Dtype   \n---  ------      --------------  -----   \n 0   categracal  3 non-null      category\n 1   numeric     3 non-null      int64   \n 2   object      3 non-null      object  \ndtypes: category(1), int64(1), object(1)\nmemory usage: 311.0+ bytes\n"