In [None]:
import os
import pandas as pd
import re

def load_and_process_files(directory, keyword, file_extension, is_rru=False):
    """
    读取指定目录中的文件，合并、处理日期和字符串列、转换特定列的数据类型，并删除重复行。

    参数:
    directory (str): 包含文件的目录路径。
    keyword (str): 文件名中包含的关键字。
    file_extension (str): 文件扩展名。
    is_rru (bool): 是否处理RRU文件。

    返回值:
    pd.DataFrame: 处理后的DataFrame。
    """
    def load_csv_files():
        files = [file for file in os.listdir(directory) if keyword in file and file.lower().endswith(file_extension)]
        dfs = []
        for file in files:
            file_path = os.path.join(directory, file)
            try:
                df = pd.read_csv(file_path, skiprows=2, header=0, encoding='cp936', na_values=["n/a", "na", "-"])
                df.columns = df.columns.str.replace(' ', '')
                dfs.append(df)
            except Exception as e:
                print(f"读取文件 {file} 时发生错误：{e}")
        return dfs

    def process_dates(df):
        for column in ['开始时间', '结束时间']:
            df[column] = pd.to_datetime(df[column].astype(str).str.split().str[0], errors='coerce').dt.strftime('%Y-%m-%d')
        return df

    def split_field(field, is_rru):
        if is_rru:
            match = re.match(r'(.+)\(gNB=(\d+),invRRU=(\d+)\)', field)
            return (match.group(2), match.group(3)) if match else (None, None)
        else:
            match = re.match(r'(.+)\(gNB=(\d+)\)', field)
            return (match.group(1), match.group(2)) if match else (None, None)

    dfs = load_csv_files()
    if not dfs:
        print("没有成功读取任何文件，请检查文件路径和过滤条件。")
        return pd.DataFrame()

    df_combined = pd.concat(dfs, ignore_index=True).drop_duplicates()
    df_combined = process_dates(df_combined)

    if is_rru:
        df_combined[['BBUID', 'RRUID']] = df_combined['对象'].apply(lambda x: pd.Series(split_field(x, is_rru)))
        df_combined = df_combined[['BBUID', 'RRUID', '开始时间', 'AAU功耗[千瓦时]']]
        df_combined['AAU功耗[千瓦时]'] = pd.to_numeric(df_combined['AAU功耗[千瓦时]'], errors='coerce').round(4).fillna(0)
    else:
        df_combined[['BBU名称', 'BBUID']] = df_combined['对象'].apply(lambda x: pd.Series(split_field(x, is_rru)))
        df_combined['站型'] = df_combined['BBU名称'].apply(lambda x: '宏站' if 'D5H' in x else ('微站' if 'D5M' in x else ('室分' if 'D5S' in x else '未知')))
        df_combined = df_combined[['BBU名称', 'BBUID', '站型', '开始时间', 'BBU功耗[千瓦时]', 'gNB基站CPU平均负荷(R1056_001)[%]', 'gNB基站CPU峰值负荷(R1056_002)[%]', 'BBU功耗(R1054_001)[W]']]
        for col in ['BBU功耗[千瓦时]', 'BBU功耗(R1054_001)[W]']:
            df_combined[col] = pd.to_numeric(df_combined[col], errors='coerce').round(4).fillna(0)
        for col in ['gNB基站CPU平均负荷(R1056_001)[%]', 'gNB基站CPU峰值负荷(R1056_002)[%]']:
            df_combined[col] = pd.to_numeric(df_combined[col], errors='coerce').fillna(0).astype(int)

    return df_combined

def calculate_antenna_and_power(bbu_df, rru_df):
    """
    在BBU DataFrame中计算天线数量和RRU总功耗，并添加频段。

    参数:
    bbu_df (pd.DataFrame): BBU数据。
    rru_df (pd.DataFrame): RRU数据。

    返回值:
    pd.DataFrame: 更新后的BBU DataFrame，包含天线数量、RRU总功耗和频段。
    """
    antenna_count = rru_df.groupby(['BBUID', '开始时间'])['RRUID'].nunique().reset_index(name='天线数量')
    total_rru_power = rru_df.groupby(['BBUID', '开始时间'])['AAU功耗[千瓦时]'].sum().reset_index(name='RRU总功耗')

    bbu_df = pd.merge(bbu_df, antenna_count, on=['BBUID', '开始时间'], how='left')
    bbu_df = pd.merge(bbu_df, total_rru_power, on=['BBUID', '开始时间'], how='left')
    bbu_df['频段'] = bbu_df['BBU名称'].apply(lambda x: '700M' if '700M' in x else '2.6G')
    bbu_df['BBU功耗[千瓦时]'] = bbu_df['BBU功耗[千瓦时]'].round(4)
    bbu_df['RRU总功耗'] = bbu_df['RRU总功耗'].round(4)
    bbu_df['总功耗'] = (bbu_df['BBU功耗[千瓦时]'] + bbu_df['RRU总功耗']).round(4)

    return bbu_df

# 使用示例
KPIDir = r'C:\Users\Administrator\Documents\MnewData'
BBU_PL = load_and_process_files(KPIDir, 'DT_BBU功耗_', '.csv')

RRUDir = r'C:\Users\Administrator\Documents\MnewData'
RRU_PL = load_and_process_files(RRUDir, 'DT_RRU功耗_', '.csv', is_rru=True)

if not BBU_PL.empty and not RRU_PL.empty:
    BS_PL = calculate_antenna_and_power(BBU_PL, RRU_PL)
    print(BS_PL.head(10).to_markdown(index=False, numalign="left", stralign="left"))

    output_path = r'C:\Users\Administrator\PYMo\Data\BS_PL.csv'
    BS_PL.to_csv(output_path, index=False, encoding='utf-8-sig')
    print(f"BS_PL已保存到 {output_path}")
else:
    print("没有数据可显示。")

| BBU名称            | BBUID   | 站型   | 开始时间   | BBU功耗[千瓦时]   | gNB基站CPU平均负荷(R1056_001)[%]   | gNB基站CPU峰值负荷(R1056_002)[%]   | BBU功耗(R1054_001)[W]   | 天线数量   | RRU总功耗   | 频段   | 总功耗   |
|:-------------------|:--------|:-------|:-----------|:------------------|:-----------------------------------|:-----------------------------------|:------------------------|:-----------|:------------|:-------|:---------|
| 丹江蔡湾700M-D5H   | 6299318 | 宏站   | 2024-11-11 | 6.6458            | 3                                  | 7                                  | 26583                   | 3          | 5.4786      | 700M   | 12.1244  |
| 丹江蔡湾700M-D5H   | 6299318 | 宏站   | 2024-11-12 | 6.6123            | 3                                  | 7                                  | 26449                   | 3          | 5.3348      | 700M   | 11.9471  |
| 丹江蔡湾700M-D5H   | 6299318 | 宏站   | 2024-11-13 | 6.5878            | 3                                  | 6                                  | 26351               

In [None]:
BS_PL.sample(10)

Unnamed: 0,BBU名称,BBUID,站型,开始时间,BBU功耗[千瓦时],gNB基站CPU平均负荷(R1056_001)[%],gNB基站CPU峰值负荷(R1056_002)[%],BBU功耗(R1054_001)[W],天线数量,RRU总功耗,频段,总功耗
131808,房县阳坪700M-D5H,6299365,宏站,2024-11-20,5.6463,5,10,22585.0,3.0,4.4044,700M,10.0507
80226,赤壁八蛇畈700M-D5H,6312851,宏站,2024-10-24,8.9838,4,8,35935.0,9.0,27.7707,700M,36.7545
235906,房县朱湾-D5H,6299170,宏站,2024-11-27,7.01,2,3,28040.0,2.0,5.5181,2.6G,12.5281
232302,丹江红花路-D5H,6337207,宏站,2024-11-21,5.8163,7,10,23265.0,3.0,28.4808,2.6G,34.2971
21907,丹江金砂院-D5H,6337219,宏站,2024-11-15,10.3143,7,9,41257.0,3.0,33.3616,2.6G,43.6759
43894,崇阳大岭700M-D5H,6312807,宏站,2024-10-08,4.7103,4,7,18841.0,2.0,2.8231,700M,7.5334
86800,林区堂坊新村700M-D5H,6337780,宏站,2024-10-25,14.0925,9,10,56370.0,9.0,15.1702,700M,29.2627
70460,房县狮子岩700M-D5H,6338250,宏站,2024-10-16,4.7508,6,7,19003.0,3.0,4.7304,700M,9.4812
154593,麻城丁家山700M-D5H,6322744,宏站,2024-11-27,5.7908,5,9,23163.0,3.0,11.2035,700M,16.9943
89755,竹山竹坪解家沟村委会700M-D5H,6341670,宏站,2024-10-26,4.1698,4,8,16679.0,2.0,2.2751,700M,6.4449


: 