In [None]:
import pandas as pd
import numpy as np

dfs = []

# 将多个文件整合到一个data frame数据中 merge all txt file into one data frame
for num in range(1, 7):
    file_path = f"File{num}.txt"
    df = pd.read_csv(file_path, sep=" ", header=None, names=["MeterID", "FiveDigitCode", "Consumption"])
    dfs.append(df)

# 合并成一个总表
merged_df = pd.concat(dfs, ignore_index=True)

# 4) 解析五位码：前三位=天(DDD)，后两位=半小时槽(TT)
codes = merged_df["FiveDigitCode"].to_numpy(np.int32, copy=False)

# 直接在 Pandas 里做整除/取模
merged_df["day"] = np.int16(codes // 100)   # <NA> 安全保留
merged_df["halfhour"] = np.int8(codes % 100)

# 只保留 day ∈ [195, 730] 且 halfhour ∈ [1, 48]
merged_df = merged_df[(merged_df["day"].between(195, 730)) & (merged_df["halfhour"].between(1, 48))]

# 计算数组索引
merged_df["day_idx"] = (merged_df["day"] - 195)
merged_df["slot_idx"] = (merged_df["halfhour"] - 1)

N_DAYS = 730-194
N_SLOTS = 48
meter_dict = {}


for meter_id, g in merged_df.groupby("MeterID", sort=False):
    arr = np.full((N_DAYS, N_SLOTS), np.nan, dtype=np.float32)
    rows, cols, vals = g[["day_idx", "slot_idx", "Consumption"]].to_numpy(dtype=np.float32).T
    arr[rows.astype(int), cols.astype(int)] = vals
    meter_dict[int(meter_id)] = arr

# 8) 简单检查
print(f"共构建 {len(meter_dict)} 个表计，示例键：", list(meter_dict.keys())[:5])
k0 = next(iter(meter_dict))
print("示例数组形状：", meter_dict[k0].shape)
print("示例前两天前两槽：\n", meter_dict[k0][:2, :2])

# 保存
np.savez_compressed("meter_data.npz", **{str(k): v for k, v in meter_dict.items()})
print("✅ 保存成功：meter_data.npz")

In [1]:
import numpy as np
import pandas as pd
from numpy.lib.stride_tricks import sliding_window_view

data = np.load('meter_data.npz')
file_list = data.files
data_load = np.empty((len(file_list), data["1000"].shape[0]*data["1000"].shape[1]))

for idx, file in enumerate(file_list):
    data_load[idx] = data[file].reshape(-1)
    
data_load = data_load[:,-365*24:]


In [2]:
# 去除所有连续空值

# 1) 找出缺失值
mask = np.isnan(data_load)

# 2) 构造长度为4的滑动窗口
windows = sliding_window_view(mask, window_shape=(1, 4))  # shape=(n_rows, n_cols-3, 1, 4)
windows = windows.squeeze(2)                              # shape=(n_rows, n_cols-3, 4)

# 3) 判断是否有4个连续的True
has_4_consecutive = np.any(np.all(windows, axis=-1), axis=-1)

# 4) 保留那些没有4个连续NaN的行
data_load = data_load[~has_4_consecutive]


In [3]:
# 去除所有连续0值

# 1) 找出0值
mask = (data_load == 0)

# 2) 构造长度为4的滑动窗口
windows = sliding_window_view(mask, window_shape=(1, 2))  # shape=(n_rows, n_cols-3, 1, 4)
windows = windows.squeeze(2)                              # shape=(n_rows, n_cols-3, 4)

# 3) 判断是否有4个连续的True
has_4_consecutive = np.any(np.all(windows, axis=-1), axis=-1)

# 4) 保留那些没有4个连续NaN的行
data_load = data_load[~has_4_consecutive]

print(data_load)


[[0.15099999 0.77700001 0.12       ... 0.51099998 0.56199998 0.63      ]
 [0.014      0.014      0.212      ... 0.25       0.20999999 0.226     ]
 [0.26699999 0.528      2.03299999 ... 0.38100001 0.442      0.46799999]
 ...
 [3.80999994 1.07299995 1.25300002 ... 0.43099999 0.45899999 0.51499999]
 [2.41899991 1.62800002 1.79400003 ... 0.125      0.113      0.189     ]
 [0.206      0.29800001 0.085      ... 0.077      0.076      0.075     ]]


In [4]:
A_filled = np.empty_like(data_load, dtype=float)

for i, row in enumerate(data_load):
    # 把 0 当成缺失
    s = pd.Series(row, dtype=float).replace(0, np.nan)

    # 插值（线性），再用最后值向后填充
    s = s.interpolate(method='linear', limit_direction='both')
    s = s.ffill()  # 向前填充（确保结尾 NaN 用最后一个数填）
    s = s.bfill()  # 向后填充（确保开头 NaN 用第一个数填）

    data_load[i] = s.to_numpy()

print(data_load)


[[0.15099999 0.77700001 0.12       ... 0.51099998 0.56199998 0.63      ]
 [0.014      0.014      0.212      ... 0.25       0.20999999 0.226     ]
 [0.26699999 0.528      2.03299999 ... 0.38100001 0.442      0.46799999]
 ...
 [3.80999994 1.07299995 1.25300002 ... 0.43099999 0.45899999 0.51499999]
 [2.41899991 1.62800002 1.79400003 ... 0.125      0.113      0.189     ]
 [0.206      0.29800001 0.085      ... 0.077      0.076      0.075     ]]


In [5]:
# 判断是否存在空值
print(np.isnan(data_load).any())

# 每行：是否存在连续两个 0（水平方向）
row_has_two = np.any(np.all(sliding_window_view(data_load == 0, 2, axis=1), axis=-1), axis=1)
print(row_has_two)  # [ True False  True ]

# 整个矩阵是否存在这样的行片段
exists_any_row = np.any(row_has_two)
print(exists_any_row)  # True

# 统计每行 0 的数量
zero_counts = np.sum(data_load == 0, axis=1)

# 只打印含 0 的行及数量
for i, count in enumerate(zero_counts):
    if count > 0:
        print(f"第 {i} 行有 {count} 个 0")

np.savez_compressed('user_load',data_load)

False
[False False False ... False False False]
False
