In [48]:
import os
import re
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# Unified data directories
Datadir = r'C:\Data\MobileData'
output_path = r'C:\Data\data'

def list_csv_files(directory, keyword):
    """List CSV files in the specified directory that contain the keyword, case-insensitively."""
    return [file for file in os.listdir(directory) if keyword.lower() in file.lower() and file.lower().endswith('.csv')]

def read_and_process_files(directory, keyword, columns=None, encoding='gbk'):
    """Read and process CSV files, returning a concatenated DataFrame."""
    files = list_csv_files(directory, keyword)
    dfs = []
    for file in files:
        file_path = os.path.join(directory, file)
        try:
            df = pd.read_csv(file_path, encoding=encoding, usecols=columns,na_values=["n/a", "na", "-"]) if columns else pd.read_csv(file_path, encoding=encoding,skiprows=2, header=0, na_values=["n/a", "na", "-"])
            df.columns = df.columns.str.replace(' ', '')
            dfs.append(df)
        except Exception as e:
            print(f"Error reading file {file}: {e}")
    return pd.concat(dfs, ignore_index=True).drop_duplicates() if dfs else pd.DataFrame()


def save_dataframe_to_csv(df, filename):
    """Save DataFrame to CSV file."""
    df.to_csv(os.path.join(output_path, filename), index=False, encoding='utf-8-sig')
    print(f"DataFrame exported to {filename}")

In [49]:
# 读取数据并重命名列
df_5GB = read_and_process_files(Datadir, '板卡规划(3000310)-5G', ['网元标识', '网元名称', '插槽号','板类型'])
df_5GB['备用'] = df_5GB['网元名称'].str.extract(r'(.*)-D')
# 去掉字段 "板"
df_5GB['板类型'] = df_5GB['板类型'].str.replace('板', '')

# 删除包含 'FCU' 和 'PSU' 的行
df_5GB = df_5GB[~df_5GB['板类型'].isin(['FCU', 'PSU'])]
df_5GB['板卡数量'] = 1

df_5GB_agg = df_5GB.groupby(['网元标识', '板类型'], as_index=False).agg({
    '板卡数量': 'sum',
    '网元名称': 'first'
})


df_5GB_agg.head(10)


Unnamed: 0,网元标识,板类型,板卡数量,网元名称
0,6299163,HBPOFp,2,房县泉水湾-D5H
1,6299163,HSCTDa,1,房县泉水湾-D5H
2,6299164,HBPOFp,1,房县新一中-D5H
3,6299164,HSCTDa,1,房县新一中-D5H
4,6299165,HBPOFp,1,房县红塔高碑工业园-D5H
5,6299165,HSCTDa,1,房县红塔高碑工业园-D5H
6,6299166,HBPOFbg,1,房县十三村委会-D5H
7,6299166,HBPOFp,1,房县十三村委会-D5H
8,6299166,HSCTDa,1,房县十三村委会-D5H
9,6299167,HBPOFp,1,房县诗经小镇1-D5H


In [50]:
# 读取数据并重命名列
df_4GB = read_and_process_files(Datadir, '板卡规划(1000310)-4G',['网元标识', '网元名称', '插槽号','板类型'])


df_4GB['备用2'] = df_4GB['网元名称'].str.extract(r'(.*)-D')
# 去掉字段 "板"
df_4GB['板类型'] = df_4GB['板类型'].str.replace('板', '')

# 包含 ['BPOKa', 'BPOI','SCTF','HSCTF']的行
df_4GB = df_4GB[df_4GB['板类型'].isin(['BPOKa', 'BPOI','SCTF','HSCTF'])]
df_4GB['板卡数量'] = 1

df_4GB_agg = df_4GB.groupby(['网元标识', '板类型'], as_index=False).agg({
    '板卡数量': 'sum',
    '网元名称': 'first'
})

df_4GB_agg.head(10)

Unnamed: 0,网元标识,板类型,板卡数量,网元名称
0,291918,BPOI,1,D_赤壁市中影城商业及住宅楼-DLW
1,309377,BPOI,1,十堰林区盛景怡家超市-DLW
2,351744,BPOKa,1,郧西老百科-DL3D
3,351745,BPOKa,1,郧西民联-DL3D
4,351746,BPOKa,1,郧西社会福利院-DL3D
5,351747,BPOKa,1,郧西天河佳苑-DL3D
6,351748,BPOKa,1,郧西金钻广场-DL3D
7,351749,BPOKa,1,郧西吴家营-DL3D
8,351750,BPOKa,1,郧西土地局-DL3D
9,351751,BPOKa,1,郧西天河余家大院-DL3D


In [51]:
# Save results
save_dataframe_to_csv(df_5GB_agg, 'df_5GB_agg.csv')
save_dataframe_to_csv(df_4GB_agg, 'df_4GB_agg.csv')


DataFrame exported to df_5GB_agg.csv
DataFrame exported to df_4GB_agg.csv


In [52]:
# 合并数据，根据 '备用' 和 '备用2' 进行匹配
merged_df = pd.merge(df_4GB_agg, df_5GB_agg, left_on=df_4GB_agg['网元名称'].str.extract(r'(.*)-D')[0].str.replace(r'^D_', '', regex=True), right_on=df_5GB_agg['网元名称'].str.extract(r'(.*)-D')[0].str.replace(r'^D_', '', regex=True), how='left')

result_df = merged_df[['网元标识_y', '网元名称_y', '板类型_x', '板卡数量_x']].rename(
    columns={
        '网元标识_y': '网元标识',
        '网元名称_y': '网元名称',
        '板类型_x': '板类型',
        '板卡数量_x': '板卡数量'
    }
)

# 删除空行
result_df = result_df.dropna(subset=['网元标识'])

# 追加数据到 df_5GB
df_5GB_con = pd.concat([df_5GB_agg, result_df], ignore_index=True)

# 选择最终结果的特定列
df_5GB_con = df_5GB_con[['网元标识', '网元名称', '板类型', '板卡数量']]

# 删除重复行
df_5GB_con= df_5GB_con.drop_duplicates()




In [53]:
df_5GB_con.sample(10)

Unnamed: 0,网元标识,网元名称,板类型,板卡数量
1667,6337514.0,武当山太极湖会议中心-D5H,HBPOF,1
2197,6342060.0,丹江郧阳码头（通感）4.9G-D5H,HBPOF,1
425,6311668.0,通城北门营业厅-D5S,HBPOFbp,1
2498,6337316.0,房县司法局-D5H,BPOKa,1
1833,6337594.0,竹山国际大酒店-D5H,HBPOF,1
370,6311636.0,通城东阁二桥-D5H,HSCTDa,1
2401,6337617.0,竹山圣玉绿松石-D5H,BPOI,2
2328,6330434.0,通城电视台-D5H,BPOKa,1
1580,6337470.0,林区神农顶-D5H,HSCTDa,1
2214,6355707.0,D_通城金泰豪苑B栋C栋-D5S,HSCTDa,1


In [55]:
# Save results
save_dataframe_to_csv(df_5GB_con, 'df_5GB_con.csv')


DataFrame exported to df_5GB_con.csv


In [60]:

df_5GB_con = df_5GB_con[df_5GB_con['网元名称'].str.contains('D5H', na=False)]
# 使用 pivot_table 进行 unstack 操作
pivot_df = df_5GB_con.pivot_table(
    index=['网元标识', '网元名称'],
    columns='板类型',
    values='板卡数量',
    fill_value=0
).reset_index()

In [61]:
pivot_df .head()

板类型,网元标识,网元名称,BPOI,BPOKa,EMAU,HBPOF,HBPOFbg,HBPOFbp,HBPOFp,HMCPB,HMCPC,HSCTDa,HSCTDb
0,6299163.0,房县泉水湾-D5H,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0
1,6299164.0,房县新一中-D5H,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
2,6299165.0,房县红塔高碑工业园-D5H,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
3,6299166.0,房县十三村委会-D5H,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0
4,6299167.0,房县诗经小镇1-D5H,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0


In [62]:

# 确定从 'BPOI' 开始到 'HSCTF' 的所有列
start_col = 'BPOI'
end_col = 'HSCTDb'
start_index = pivot_df.columns.get_loc(start_col)
end_index = pivot_df.columns.get_loc(end_col)

# 将从 'BPOI' 开始到 'HSCTF' 的所有列转换为整数型
for col in pivot_df.columns[start_index:end_index + 1]:
    # 将无法转换为整数的值替换为 NaN，然后填充为 0，再转换为整数
    pivot_df[col] = pd.to_numeric(pivot_df[col], errors='coerce').fillna(0).astype(int)

# 按位连接这些列成一列
pivot_df['Model2'] = pivot_df.iloc[:, start_index:end_index + 1].astype(str).agg(''.join, axis=1)



pivot_df.head(10)

板类型,网元标识,网元名称,BPOI,BPOKa,EMAU,HBPOF,HBPOFbg,HBPOFbp,HBPOFp,HMCPB,HMCPC,HSCTDa,HSCTDb,Model2
0,6299163.0,房县泉水湾-D5H,0,0,0,0,0,0,2,0,0,1,0,20010
1,6299164.0,房县新一中-D5H,0,0,0,0,0,0,1,0,0,1,0,10010
2,6299165.0,房县红塔高碑工业园-D5H,0,0,0,0,0,0,1,0,0,1,0,10010
3,6299166.0,房县十三村委会-D5H,0,0,0,0,1,0,1,0,0,1,0,1010010
4,6299167.0,房县诗经小镇1-D5H,0,0,0,0,0,0,1,0,0,1,0,10010
5,6299169.0,房县三中-D5H,1,0,0,0,1,0,1,0,0,1,0,10001010010
6,6299170.0,房县朱湾-D5H,0,0,0,0,1,0,0,0,0,1,0,1000010
7,6299171.0,房县嘉卉花园小区-D5H,0,0,0,0,0,0,1,0,0,1,0,10010
8,6299172.0,丹江临港家园-D5H,0,0,0,0,0,0,1,0,0,1,0,10010
9,6299173.0,丹江月亮湾-D5H,0,0,0,0,1,0,1,0,0,1,0,1010010


In [63]:
# 对 'Model2' 列进行分组，并统计每个组中 '网元标识' 的数量
grouped_counts = pivot_df.groupby('Model2')['网元标识'].count().reset_index()

# 重命名计数结果列
grouped_counts.rename(columns={'网元标识': '网元标识计数'}, inplace=True)


grouped_counts.head(100)


Unnamed: 0,Model2,网元标识计数
0,10001,1
1,10010,103
2,20010,3
3,100001,3
4,100010,36
5,110010,2
6,200010,3
7,1000010,163
8,1010010,5
9,2000010,34


In [64]:
# Save results
save_dataframe_to_csv(pivot_df, 'pivot_df.csv')

DataFrame exported to pivot_df.csv
