In [12]:
import numpy as np
import os

# 數據讀取
## 不能讀取 excel!!!
## txt

In [7]:
with open("Data/Data.txt", "r") as f:
    print("原始數據:\n", f.read())

# 以逗點為分隔符號, 略過第一行
data = np.loadtxt("Data/Data.txt", delimiter=",", skiprows=1, dtype=np.int64)
print('\nload的數據:\n', data)

原始數據:
  # StudentID, Age, Score
20131,10,67
20132,11,88
20133,12,98
20134,8,100
20135,9,75
20136,12,78

load的數據:
 [[20131    10    67]
 [20132    11    88]
 [20133    12    98]
 [20134     8   100]
 [20135     9    75]
 [20136    12    78]]


## csv

In [8]:
with open("Data/Data.csv", "r") as f:
    print("原始數據:\n", f.read())

# 以逗點為分隔符號, 略過第一行
data = np.loadtxt("Data/Data.csv", delimiter=",", skiprows=1, dtype=np.int64)
print('\nload的數據:\n', data)

原始數據:
  # StudentID, Age, Score
20131,10,67
20132,11,88
20133,12,98
20134,8,100
20135,9,75
20136,12,78

load的數據:
 [[20131    10    67]
 [20132    11    88]
 [20133    12    98]
 [20134     8   100]
 [20135     9    75]
 [20136    12    78]]


## 字串

In [10]:
row_string = "20131, 10, 67, 20132, 11, 88, 20133, 12, 98, 20134, 8, 100, 20135, 9, 75, 20136, 12, 78"
data = np.fromstring(row_string, dtype=np.int64, sep=",")
data = data.reshape(6, 3)
print(data)

[[20131    10    67]
 [20132    11    88]
 [20133    12    98]
 [20134     8   100]
 [20135     9    75]
 [20136    12    78]]


# 數據保存
## csv or txt

In [13]:
print("numpy data:\n", data)
np.savetxt("Data/Data_Save.csv", data, delimiter=",", fmt='%s')

print("data file in directory:", os.listdir("Data"))
with open("Data/Data_Save.csv", "r") as f:
    print("\n", f.read())

numpy data:
 [[20131    10    67]
 [20132    11    88]
 [20133    12    98]
 [20134     8   100]
 [20135     9    75]
 [20136    12    78]]
data file in directory: ['Data.csv', 'Data.txt', 'Data_Save.csv']

 20131,10,67
20132,11,88
20133,12,98
20134,8,100
20135,9,75
20136,12,78



## 二進制 np.save() 存.npy檔 
### 僅能以np.load()讀取

In [15]:
print('data:\n', data)
np.save("Data/Data_Save.npy", data)

print("data file in directory:", os.listdir("Data"))
npy_data = np.load("Data/Data_Save.npy")
print(npy_data)

data:
 [[20131    10    67]
 [20132    11    88]
 [20133    12    98]
 [20134     8   100]
 [20135     9    75]
 [20136    12    78]]
data file in directory: ['Data.csv', 'Data.txt', 'Data_Save.csv', 'Data_Save.npy']
[[20131    10    67]
 [20132    11    88]
 [20133    12    98]
 [20134     8   100]
 [20135     9    75]
 [20136    12    78]]


## 同時存多個array: np.savez()
### 僅能以 np.load()讀取

In [16]:
train_data = np.array([1,2,3])
test_data = np.array([11,22,33])

# train, test為自訂的標籤, 讀取數據回來時使用的, 類似讀取字典
np.savez("Data/Data_Save.npz", train=train_data, test=test_data)
print("data file in directory:", os.listdir("Data"))
npz_data = np.load("Data/Data_Save.npz")
print("train: ", npz_data["train"])
print("test: ", npz_data["test"])

data file in directory: ['Data.csv', 'Data.txt', 'Data_Save.csv', 'Data_Save.npy', 'Data_Save.npz']
train:  [1 2 3]
test:  [11 22 33]


## 壓縮存取數據, np.savez_compressed()
### 僅能以np.load()讀取, 檔案大小更小

In [18]:
np.savez_compressed("Data/Data_Save_compressed.npz", train=train_data, test=test_data)
print("data file in directory:", os.listdir("Data"))

npz_data_compressed = np.load("Data/Data_Save_compressed.npz")
print("train:", npz_data_compressed["train"])
print("test:", npz_data_compressed["test"])

print("compressed file size:", os.path.getsize("Data/Data_Save_compressed.npz"))
print("original file size:", os.path.getsize("Data/Data_Save.npz"))


data file in directory: ['Data.csv', 'Data.txt', 'Data_Save.csv', 'Data_Save.npy', 'Data_Save.npz', 'Data_Save_compressed.npz']
train: [1 2 3]
test: [11 22 33]
compressed file size: 402
original file size: 528
