In [None]:
from collections import Counter
import numpy as np
import pandas as pd

In [None]:
# 计算连续时间
def continuous_time(group, arr):
    if not arr:
        return []

    result = []
    start = arr[0]
    length = 1

    for i in range(1, len(arr)):
        if arr[i] == arr[i - 1] + 1:
            length += 1
        else:
            result.append(group + (start, length))
            start = arr[i]
            length = 1

    result.append(group + (start, length))
    return result

In [None]:
# 故障预测平滑
def smooth_prediction(y_pred, isolate_window=5, min_segment_len=20, max_gap=200):
    """
    y_pred: 1D array-like
    isolate_window: 判定孤立点的左右窗口
    min_segment_len: 最小有效段长度
    max_gap: 新故障段的最小间隔
    """
    y = np.asarray(y_pred).copy()
    n = len(y)

    # ---------- Step 1: 去除孤立点 ----------
    for i in range(n):
        l = max(0, i - isolate_window)
        r = min(n, i + isolate_window + 1)

        if np.all(y[l:i] == 0) and np.all(y[i+1:r] == 0):
            y[i] = 0

    # ---------- Step 2: 找非零索引 ----------
    idx = np.flatnonzero(y)
    if len(idx) == 0:
        return y

    # ---------- Step 3: 划分连续区间 ----------
    segments = []
    start = idx[0]

    for i in range(1, len(idx)):
        if idx[i] - idx[i - 1] > max_gap:
            segments.append((start, idx[i - 1]))
            start = idx[i]

    segments.append((start, idx[-1]))

    # ---------- Step 4: 平滑处理 ----------
    result = y.copy()

    for s, e in segments:
        length = e - s + 1

        if length < min_segment_len:
            result[s:e+1] = 0
        else:
            values = y[s:e+1]
            major = Counter(values).most_common(1)[0][0]
            result[s:e+1] = major

    return result

In [None]:
# 计算故障记录
def get_fault_records(data_):
    # 只保留需要的列
    data_ = data_[['日期', '时间', '机器状态']]
    data_ = data_[data_['机器状态'] != 0]
    # 排序
    data_ = data_.sort_values(['机器状态', '日期', '时间'])

    all_records = []

    for (state, date), data_ in data_.groupby(['机器状态', '日期']):
        times = data_['时间'].to_numpy()

        records = continuous_time(
            group=(state, date),
            arr=times
        )
        all_records.extend(records)
    return pd.DataFrame(all_records, columns=['机器状态', '日期', '开始时间', '持续时间'])

In [None]:
# 不同故障分类汇总
def save_fault_records(fault_records):
    blocks = []

    for state, group in fault_records.groupby('机器状态'):
        block = group[['日期', '开始时间', '持续时间']].reset_index(drop=True)
        blocks.append(block)

    final_fault_records = pd.concat(blocks, axis=1)
    final_fault_records.insert(0, '序号', range(len(final_fault_records)))

    return final_fault_records

In [None]:
# 最终呈现结果
def count_fault_records(fault_records):
    # 分组统计
    result = (fault_records.groupby('机器状态').agg(
            故障总次数=('持续时间', 'count'),
            最长持续时间=('持续时间', 'max'),
            最短持续时间=('持续时间', 'min')
        )
        .reset_index()
    )
    # 故障编号映射
    fault_map = {
        1: 1001, 2: 2001, 3: 4001, 4: 4002, 5: 4003,
        6: 5001, 7: 5002, 8: 6001, 9: 6002
    }
    result['故障编号'] = result['机器状态'].map(fault_map)
    # 调整列顺序
    result = result[['故障编号', '故障总次数', '最长持续时间', '最短持续时间']]
    return result

In [None]:
def process_machine_data(csv_path):
    # 读取数据
    data = pd.read_csv(csv_path)

    # 平滑
    data['机器状态'] = smooth_prediction(data['机器状态'])

    # 故障统计
    fault_record = get_fault_records(data)
    final_fault_record = save_fault_records(fault_record)
    count_fault_record = count_fault_records(fault_record)

    return fault_record,final_fault_record, count_fault_record

In [None]:
fault_record_201, final_fault_record_201, count_fault_record_201 = process_machine_data('result/M201.csv')
fault_record_202, final_fault_record_202, count_fault_record_202 = process_machine_data('result/M202.csv')


In [None]:
count_faults_1 = pd.DataFrame()
count_faults_2 = pd.DataFrame()

# 月份从1到12的天数
days_in_months = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
k = 0
for i in range(0, 12):
    start = k
    end = start+days_in_months[i]
    count_fault_1 = count_fault_records(fault_record_201[(fault_record_201['日期'] > start) & (fault_record_201['日期'] <= end)])
    count_fault_2 = count_fault_records(fault_record_202[(fault_record_202['日期'] > start) & (fault_record_202['日期'] <= end)])
    count_fault_1['日期'] = i+1
    count_fault_2['日期'] = i+1

    count_faults_1 = pd.concat([count_faults_1, count_fault_1], ignore_index=True)
    count_faults_2 = pd.concat([count_faults_2, count_fault_2], ignore_index=True)

    k = start+days_in_months[i]


In [None]:
count_faults_1.to_excel('result/result2-1统计.xlsx', index=False)
count_faults_2.to_excel('result/result2-2统计.xlsx', index=False)