In [19]:
import pandas as pd

# 读取数据
df = pd.read_csv("data.csv")

# 数据预处理
df['工单编号'].fillna(1, inplace=True)
df['录音时长(秒)'].fillna(0, inplace=True)
df['开始时间'] = pd.to_datetime(df['开始时间'])

# 1. 每日一个通话超过180秒的工单编号个数
df['超过180秒'] = df['录音时长(秒)'] > 180
over_180_daily = df.groupby(df['开始时间'].dt.date)['超过180秒'].sum().reset_index()
over_180_daily.rename(columns={'开始时间': '日期', '超过180秒': '超过180秒工单数'}, inplace=True)
over_180_daily['超过180秒工单数'] = over_180_daily['超过180秒工单数'].fillna(0).astype(int) # 转换并填充


# 2. 每日三分钟内出现2个以上通话统计的工单编号个数
df['3分钟内'] = df.groupby(pd.Grouper(key='开始时间', freq='3Min'))['工单编号'].transform('count') >= 2
within_3min_daily = df[df['3分钟内']].groupby(df['开始时间'].dt.date)['工单编号'].nunique().reset_index()
within_3min_daily.rename(columns={'开始时间': '日期', '工单编号': '3分钟内2个以上通话工单数'}, inplace=True)
within_3min_daily['3分钟内2个以上通话工单数'] = within_3min_daily['3分钟内2个以上通话工单数'].fillna(0).astype(int) # 转换并填充


# 3. 每日五分钟内出现2个以上通话统计的工单编号个数
df['5分钟内'] = df.groupby(pd.Grouper(key='开始时间', freq='5Min'))['工单编号'].transform('count') >= 3
within_5min_daily = df[df['5分钟内']].groupby(df['开始时间'].dt.date)['工单编号'].nunique().reset_index()
within_5min_daily.rename(columns={'开始时间': '日期', '工单编号': '5分钟内2个以上通话工单数'}, inplace=True)
within_5min_daily['5分钟内2个以上通话工单数'] = within_5min_daily['5分钟内2个以上通话工单数'].fillna(0).astype(int) # 转换并填充


# 4. 每天每个服务号的总通话次数
daily_service_calls = df.groupby([df['开始时间'].dt.date, '服务号'])['工单编号'].count().reset_index()
daily_service_calls.rename(columns={'开始时间': '日期', '工单编号': '通话次数'}, inplace=True)

# 合并前三个统计结果
merged_results = pd.merge(over_180_daily, within_3min_daily, on='日期', how='outer')
merged_results = pd.merge(merged_results, within_5min_daily, on='日期', how='outer')


final_results = pd.DataFrame()
final_results['日期']=merged_results['日期']
final_results['超过180秒工单数']=merged_results['超过180秒工单数']
final_results['3分钟内2个以上通话工单数']=merged_results['3分钟内2个以上通话工单数']
final_results['5分钟内2个以上通话工单数']=merged_results['5分钟内2个以上通话工单数']


# 将服务号转换为字符串类型
daily_service_calls['服务号'] = daily_service_calls['服务号'].astype(str)

# 数据透视，生成每个服务号的通话次数列
daily_service_calls_wide = daily_service_calls.pivot(index='日期', columns='服务号', values='通话次数').reset_index()
daily_service_calls_wide.columns.name = None  # 移除列索引名称


# 自定义服务号列名
daily_service_calls_wide.columns = ['日期'] + [f'{col}通话次数' for col in daily_service_calls_wide.columns[1:]]

# 合并所有统计结果
final_results = pd.merge(final_results, daily_service_calls_wide, on='日期', how='outer')


# 填充NaN值为0,并转化为整型
for col in final_results.columns:
    if final_results[col].dtype == 'float64':
        final_results[col] = final_results[col].fillna(0).astype(int)

final_results=final_results.sort_values(by='日期')

# 输出到 CSV
final_results.to_csv("result.csv", index=False)
print(final_results)

            日期  超过180秒工单数  3分钟内2个以上通话工单数  5分钟内2个以上通话工单数  51105950.0通话次数  \
0   2025-01-01          0              0              0               6   
1   2025-01-02         10              7              8              15   
2   2025-01-03         20             17              9              13   
3   2025-01-04          3              0              0               8   
4   2025-01-05          0              2              0               2   
..         ...        ...            ...            ...             ...   
70  2025-03-12          7             20             19               4   
71  2025-03-13          9              4              0              10   
72  2025-03-14          7              6              0               7   
73  2025-03-15          7              2              0               4   
74  2025-03-16          2              4              0               6   

    51105951.0通话次数  51105952.0通话次数  51105953.0通话次数  51105954.0通话次数  \
0                3           

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['工单编号'].fillna(1, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['录音时长(秒)'].fillna(0, inplace=True)


In [6]:
import pandas as pd

# 读取数据
df = pd.read_csv("data.csv")

# 数据预处理
df['工单编号'].fillna(1, inplace=True)
df['录音时长(秒)'].fillna(0, inplace=True)
df['开始时间'] = pd.to_datetime(df['开始时间'])

# 1. 每日一个通话超过180秒的工单编号个数
df['超过180秒'] = df['录音时长(秒)'] > 180
over_180_daily = df.groupby(df['开始时间'].dt.date)['超过180秒'].sum().reset_index()
over_180_daily.rename(columns={'开始时间': '日期', '超过180秒': '超过180秒工单数'}, inplace=True)
over_180_daily['超过180秒工单数'] = over_180_daily['超过180秒工单数'].fillna(0).astype(int)

# 2. 每日三分钟内出现2个以上通话统计的工单编号个数
df['3分钟内'] = df.groupby(pd.Grouper(key='开始时间', freq='3Min'))['工单编号'].transform('count') >= 2
within_3min_daily = df[df['3分钟内']].groupby(df['开始时间'].dt.date)['工单编号'].nunique().reset_index()
within_3min_daily.rename(columns={'开始时间': '日期', '工单编号': '3分钟内2个以上通话工单数'}, inplace=True)
within_3min_daily['3分钟内2个以上通话工单数'] = within_3min_daily['3分钟内2个以上通话工单数'].fillna(0).astype(int)

# 3. 每日五分钟内出现2个以上通话统计的工单编号个数
df['5分钟内'] = df.groupby(pd.Grouper(key='开始时间', freq='5Min'))['工单编号'].transform('count') >= 2
within_5min_daily = df[df['5分钟内']].groupby(df['开始时间'].dt.date)['工单编号'].nunique().reset_index()
within_5min_daily.rename(columns={'开始时间': '日期', '工单编号': '5分钟内3个以上通话工单数'}, inplace=True)
within_5min_daily['5分钟内3个以上通话工单数'] = within_5min_daily['5分钟内3个以上通话工单数'].fillna(0).astype(int)

# 4. 每天每个服务号的总通话次数
daily_service_calls = df.groupby([df['开始时间'].dt.date, '服务号'])['工单编号'].count().reset_index()
daily_service_calls.rename(columns={'开始时间': '日期', '工单编号': '通话次数'}, inplace=True)

# 合并前三个统计结果
merged_results = pd.merge(over_180_daily, within_3min_daily, on='日期', how='outer')
merged_results = pd.merge(merged_results, within_5min_daily, on='日期', how='outer')

final_results = pd.DataFrame()
final_results['日期'] = merged_results['日期']
final_results['超过180秒工单数'] = merged_results['超过180秒工单数']
final_results['3分钟内2个以上通话工单数'] = merged_results['3分钟内2个以上通话工单数']
final_results['5分钟内3个以上通话工单数'] = merged_results['5分钟内3个以上通话工单数']

# 数据透视，生成每个服务号的通话次数列
daily_service_calls_wide = daily_service_calls.pivot(index='日期', columns='服务号', values='通话次数').reset_index()
daily_service_calls_wide.columns.name = None

# 强制转换所有列名为字符串，并自定义服务号列名
daily_service_calls_wide.columns = ['日期'] + [f'{str(col).replace(".0", "")}通话次数' for col in daily_service_calls_wide.columns[1:]]

# 合并所有统计结果
final_results = pd.merge(final_results, daily_service_calls_wide, on='日期', how='outer')

# 填充NaN值为0,并转化为整型
for col in final_results.columns:
    if final_results[col].dtype == 'float64':
        final_results[col] = final_results[col].fillna(0).astype(int)

final_results = final_results.sort_values(by='日期')

# 输出到 CSV，使用 UTF-8 编码
final_results.to_csv("result.csv", index=False, encoding='utf-8')

# 输出到 Excel
final_results.to_excel("result.xlsx", index=False, engine='openpyxl')
print(final_results)

            日期  超过180秒工单数  3分钟内2个以上通话工单数  5分钟内3个以上通话工单数  51105950通话次数  \
0   2025-01-01          0              0              3             6   
1   2025-01-02         10              7             16            15   
2   2025-01-03         20             17             28            13   
3   2025-01-04          3              0              2             8   
4   2025-01-05          0              2              4             2   
..         ...        ...            ...            ...           ...   
70  2025-03-12          7             20             21             4   
71  2025-03-13          9              4              8            10   
72  2025-03-14          7              6              6             7   
73  2025-03-15          7              2              2             4   
74  2025-03-16          2              4              6             6   

    51105951通话次数  51105952通话次数  51105953通话次数  51105954通话次数  51105955通话次数  \
0              3             0             0   

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['工单编号'].fillna(1, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['录音时长(秒)'].fillna(0, inplace=True)
