# 导入模块

In [34]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine, text, MetaData, Table
from sqlalchemy.exc import OperationalError, SQLAlchemyError
import warnings
warnings.filterwarnings('ignore')
from urllib.parse import quote_plus
#   pip install sqlalchemy==2.0.20 需要2.0版本
pd.set_option('display.max_columns', None)


In [35]:
# def create_engines():
#     # 创建数据库连接引擎
#     # 请根据你的实际情况修改以下参数
#     try:
#         engine = create_engine('mysql+pymysql://ylc:digua2025@)@%@rm-wz95u72vb6922ju02bo.mysql.rds.aliyuncs.com/db_digua_business?charset=utf8mb4')
#         with engine.connect() as connection:
#             print("数据库连接成功！")
#     except OperationalError as e:
#         print(f"连接失败：{e}")
#         print("请检查：1) 用户名/密码 2) 主机地址 3) 端口号 4) 数据库是否存在 5) MySQL服务是否运行")
#     return engine


def create_engines():
    # 创建数据库连接引擎
    try:
        # 数据库连接参数配置
        db_username = 'ylc'           # 数据库用户名
        db_password = 'digua2025@)@%'     # 数据库密码
        db_host = 'rm-wz95u72vb6922ju02bo.mysql.rds.aliyuncs.com'  # 数据库主机地址
        db_port = '3306'              # 数据库端口号
        db_name = 'db_digua_business' # 数据库名称
        
        # 对密码中的特殊字符进行URL编码
        encoded_password = quote_plus(db_password)
        
        # 构建连接字符串 - 使用编码后的密码
        connection_string = f'mysql+pymysql://{db_username}:{encoded_password}@{db_host}:{db_port}/{db_name}?charset=utf8mb4'
        
        # 创建数据库引擎
        engine = create_engine(connection_string)
        
        # 测试连接
        with engine.connect():
            print("数据库连接成功！")
        
        return engine
        
    except OperationalError as e:
        print(f"连接失败：{e}")
        print("请检查：")
        print(f"1) 数据库用户名: {db_username}")
        print("2) 数据库密码: ***** (已进行URL编码处理)")
        print(f"3) 数据库主机地址: {db_host}")
        print(f"4) 数据库端口号: {db_port}")
        print(f"5) 数据库名称: {db_name}")
        print("6) 网络连接是否正常")
        print("7) 数据库服务是否运行")
    except Exception as e:
        print(f"发生其他错误：{e}")
    
    return None

In [36]:


def add_columns(table_name, new_column, column_type, default_value, remark):
    '''
    table_name：表名
    new_column：新字段名
    column_type：字段数据类型
    default_value：默认值
    remark：备注
    '''
    engine = create_engines()
    
    try:
        # 获取数据表中字段名，并判断需要写入的字段是否在数据表中
        metadata = MetaData()
        # 反射表结构
        table = Table(table_name, metadata, autoload_with=engine)
        # 获取字段名列表
        column_names = [column.name for column in table.columns]
        
        with engine.connect() as connection:
            # 判断需要写入的字段名是否在字段名列表中
            if new_column in column_names:
                # 字段已存在，先删除该字段
                drop_query = text(f"""
                    ALTER TABLE {table_name}
                    DROP COLUMN {new_column}
                """)
                connection.execute(drop_query)
                print(f"已删除原有字段 {new_column}")
            
            # 执行 ALTER TABLE 语句添加新字段
            # 判断字段类型是否为字符串类型，从而决定是否添加字符集和排序规则
            if column_type.lower() in ['varchar', 'char', 'text', 'longtext', 'mediumtext', 'tinytext']:
                # 字符串类型字段需要指定字符集和排序规则
                alter_query = text(f"""
                    ALTER TABLE {table_name}
                    ADD COLUMN {new_column} {column_type} CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT {default_value} COMMENT '{remark}'
                """)
            else:
                # 数值类型字段不需要字符集和排序规则
                alter_query = text(f"""
                    ALTER TABLE {table_name}
                    ADD COLUMN {new_column} {column_type} DEFAULT {default_value} COMMENT '{remark}'
                """)
            
            connection.execute(alter_query)
            print(f"成功添加字段 {new_column} 到表 {table_name}")
            
    except SQLAlchemyError as e:
        print(f"添加字段失败: {e}")
    finally:
        # 关闭数据库连接
        engine.dispose()

In [37]:
# # 表名，字段名，类型，默认值，备注
# # add_columns('test', 'M7', 'varchar(255)', '', 'M7')
# table_list = ['Vintage30_order', 'Vintage60_order', 'Vintage90_order', 'Vintage30_purchase_amount', 'Vintage60_purchase_amount', 'Vintage90_purchase_amount']
# # 为每个表添加MOB36字段
# for table_name in table_list:
#     add_columns(
#         table_name=table_name,
#         new_column='sort36',
#         column_type='varchar(255)',
#         default_value='NULL',
#         remark='sort36'
#     )

In [38]:
# 写入数据
def insert_data(df, table_name):
    '''
    df：需要写入的数据集
    table_name：需要写入的表名
    '''
    engine = create_engines()
    try:
        # 查询数据库，判断是否有数据，有数据则删除数据在执行插入数据操作
        try:
            with engine.begin() as connection:
                # 查询数据库是否有数据
                result = connection.execute(text(f"SELECT EXISTS(SELECT 1 FROM {table_name} LIMIT 1)")).scalar()
                if result:
                    # 删除表数据
                    connection.execute(text(f"""DELETE FROM {table_name}"""))
                    print(f"成功删除表 {table_name} 的数据")
                else:
                    print(f"表 {table_name} 没有数据 请插入数据")
        except SQLAlchemyError as e:
            print(f"删除数据失败: {e}")
        # 将 DataFrame 写入数据库
        # name：表名
        # con：数据库连接
        # if_exists：如果表已存在的处理方式（'fail'：失败，'replace'：替换，'append'：追加）
        # index：是否将索引写入数据库
        df.to_sql(
            name=table_name,
            con=engine,
            if_exists='append',
            index=False
        )
        print("数据成功写入数据库！")
        
    except Exception as e:
        print(f"写入数据时出错：{e}")
        
    finally:
        # 关闭数据库连接
        engine.dispose()


# 读取数据

In [39]:
pare = pd.ExcelFile('F:/租后表分析数据_ld/写入数据库/迪瓜租机数据 to 客商_存数据库_20251011.xlsx')
# 获取Excel文件中所有工作表的名称列表
sheet_names = pare.sheet_names #['业务量及通过率', '余额衰减', 'vintage（金额比例）', '每月截面数据', '首逾率', '迁徙率']
# '余额衰减'，header=1 表示使用第二行作为列名
df1 = pare.parse(sheet_names[1], header=1)
# 'vintage（金额比例）'
df2 = pare.parse(sheet_names[2], header=1)
df2 = df2.iloc[:, 1:]
# '每月截面数据'
df3 = pare.parse(sheet_names[3], header=1)
# '首逾率'
df4 = pare.parse(sheet_names[4], header=2)
df4 = df4.iloc[:, 1:]
# '迁徙率'
df5 = pare.parse(sheet_names[5], header=1)

# 余额衰减

In [40]:
df1_2 = df1.iloc[43:82, 40:80]
df1_2
# df1

Unnamed: 0,下单月份.1,采购金额,MOB0.1,MOB1.1,MOB2.1,MOB3.1,MOB4.1,MOB5.1,MOB6.1,MOB7.1,MOB8.1,MOB9.1,MOB10.1,MOB11.1,MOB12.1,MOB13.1,MOB14.1,MOB15.1,MOB16.1,MOB17.1,MOB18.1,MOB19.1,MOB20.1,MOB21.1,MOB22.1,MOB23.1,MOB24.1,MOB25.1,MOB26.1,MOB27.1,MOB28.1,MOB29.1,MOB30.1,MOB31.1,MOB32.1,MOB33.1,MOB34.1,MOB35.1,MOB36.1
43,2022-07,87838.0,72300.64,64152.53,60646.15,57035.3,53344.39,45224.51,41989.16,40171.42,33725.42,32711.29,32090.39,32090.39,29867.95,29867.95,29867.95,29867.95,29867.95,29867.95,29867.95,29867.95,29867.95,29867.95,29867.95,29867.95,29867.95,29867.95,29867.95,29867.95,29867.95,29867.95,29867.95,29867.95,29295.99,29295.99,29295.99,29295.99,29295.99
44,2022-08,247673.0,198920.3,179674.44,166456.98,154726.27,141613.68,131680.69,122604.54,116434.38,111694.61,106600.78,104029.34,104029.34,102302.34,102234.34,102234.34,102234.34,102234.34,102234.34,102234.34,102234.34,102234.34,102234.34,99382.41,99382.41,99382.41,99382.41,99382.41,99382.41,99382.41,99382.41,99240.41,99240.41,99240.41,99240.41,96900.4,96900.4,96900.4
45,2022-09,174241.9,137762.71,122497.47,114518.34,106070.78,99184.36,91041.75,84547.91,77078.46,66852.9,59703.05,53467.99,53106.99,52615.99,52337.99,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2,50989.2
46,2022-10,155454.49,133980.12,124983.21,116318.11,102232.44,95087.54,88474.44,81393.64,74439.74,69727.73,64461.92,57370.15,53624.15,43222.29,41121.29,40981.29,40981.29,40888.29,40605.79,40605.79,40605.79,40605.79,40605.79,40605.79,40605.79,40605.79,40605.79,40605.79,40605.79,40605.79,40605.79,40605.79,40605.79,40605.79,40605.79,40605.79,40605.79,
47,2022-11,194740.22,165244.62,154570.62,139399.92,128091.72,116776.62,107563.32,98002.62,88293.72,79165.72,72100.12,59914.82,56530.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,49137.12,,
48,2022-12,369623.59,309658.38,283819.87,262012.67,234846.25,213310.65,190274.03,171621.32,151584.53,133199.01,119072.99,108895.8,96650.7,79626.9,75832.4,75832.4,75832.4,75832.4,75832.4,74073.5,74073.5,74073.5,74073.5,74073.5,74073.5,74073.5,74073.5,74073.5,74073.5,72818.3,72818.3,72818.3,72818.3,72818.3,72818.3,,,
49,2023-01,1078944.0,902155.85,811601.6,733103.7,661184.2,580493.2,508660.8,445440.45,385758.85,348749.25,314232.05,293268.75,272555.1,215423.75,206808.5,204260.2,203058.7,203058.7,203042.2,203042.2,203042.2,203042.2,203042.2,203042.2,203042.2,201697.2,201697.2,201697.2,201697.2,201697.2,201697.2,201697.2,201697.2,199704.2,,,,
50,2023-02,1627995.82,1229669.29,1264372.74,1167161.5,1067757.95,973443.75,873111.02,779516.32,696965.52,631855.7,574867.56,523843.47,484726.9,366329.04,337471.53,332209.1,325506.53,325506.53,318554.53,318554.53,317498.53,317498.53,317498.53,309279.93,309279.93,309279.93,309279.93,309279.93,309279.93,309279.93,308131.14,301425.44,297827.14,,,,,
51,2023-03,1518576.0,1120068.89,1222207.27,1104055.68,1004662.71,896874.85,804775.77,723261.0,636819.49,582787.38,535263.47,496116.31,469524.84,354309.43,324560.64,321215.74,321215.74,321083.67,321083.67,321083.67,321083.67,321083.67,321083.67,321083.67,321083.67,320491.67,320491.67,320491.67,317345.27,308563.0,308563.0,306086.21,,,,,,
52,2023-04,3193527.5,2678931.07,2663982.04,2443707.97,2228458.28,2018129.19,1815607.77,1613143.12,1443065.35,1311892.97,1186644.93,1084658.57,1020908.85,711563.71,653639.15,645967.75,642773.25,640535.18,638804.38,624578.83,623711.82,622386.82,616476.22,614253.29,611979.02,611979.02,611979.02,609999.81,601543.15,601543.15,601543.15,,,,,,,


In [41]:
def update_col1(df):
    df_col_list = df.columns.to_list()
    
    for col in df_col_list:
        col_new = col.split('.')[0]
        # 将原来的列名 col 改为 col_new，inplace=True 表示直接在原DataFrame上修改。
        df.rename(columns={col: f'{col_new}'}, inplace=True) 
    df.rename(columns={'下单月份': 'search_time', '出库订单数': 'out_order_number', '采购金额': 'purchase_amount'}, inplace=True)
    # df.loc[:, 'search_time'] = df.search_time.apply(lambda x: str(x).split('.')[0]+'-'+(str(x).split('.')[1] if str(x).split('.')[1]!='1' else '10'))
    df.loc[:, 'search_time'] = df.search_time.apply(
        lambda x: x if pd.isna(x) else
        str(x).split('.')[0] + '-' + (str(x).split('.')[1] if str(x).split('.')[1] != '1' else '10')
        if len(str(x).split('.')) > 1 else str(x)
    )
    return df

# 余额衰减 订单口径
# 从 df1 中提取前38行、前39列的数据,
df1_1 = df1.iloc[:39, :39]
df1_1 = update_col1(df1_1)

df1_2 = df1.iloc[43:82, :39]
df1_2 = update_col1(df1_2)

df1_3 = df1.iloc[85:124, :39]
df1_3 = update_col1(df1_3)

df1_4 = df1.iloc[127:166, :39]
df1_4 = update_col1(df1_4)
# 金额口径
df1_5 = df1.iloc[:39, 40:80]
df1_5 = update_col1(df1_5)

df1_6 = df1.iloc[43:82, 40:80]
df1_6 = update_col1(df1_6)

df1_7 = df1.iloc[85:124, 40:80]
df1_7 = update_col1(df1_7)

df1_8 = df1.iloc[127:166, 40:80]
df1_8 = update_col1(df1_8)

df_list = [df1_1, df1_2, df1_3, df1_4, df1_5, df1_6, df1_7, df1_8]
table_list = ['mob_order_all','mob_order_12m','mob_order_18m','mob_order_24m', 'mob_purchase_amount_all', 'mob_purchase_amount_12m', 'mob_purchase_amount_18m','mob_purchase_amount_24m']
for idx, table_name in enumerate(table_list):
    insert_data(df_list[idx], table_name)


数据库连接成功！
成功删除表 mob_order_all 的数据
数据成功写入数据库！
数据库连接成功！
成功删除表 mob_order_12m 的数据
数据成功写入数据库！
数据库连接成功！
成功删除表 mob_order_18m 的数据
数据成功写入数据库！
数据库连接成功！
成功删除表 mob_order_24m 的数据
数据成功写入数据库！
数据库连接成功！
成功删除表 mob_purchase_amount_all 的数据
数据成功写入数据库！
数据库连接成功！
成功删除表 mob_purchase_amount_12m 的数据
数据成功写入数据库！
数据库连接成功！
成功删除表 mob_purchase_amount_18m 的数据
数据成功写入数据库！
数据库连接成功！
成功删除表 mob_purchase_amount_24m 的数据
数据成功写入数据库！


In [42]:
# insert_data(df1_4, 'mob_order_24m')
df2#.iloc[:37, :36]

Unnamed: 0,Month,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,Unnamed: 37,Month.1,2.1,3.1,4.1,5.1,6.1,7.1,8.1,9.1,10.1,11.1,12.1,13.1,14.1,15.1,16.1,17.1,18.1,19.1,20.1,21.1,22.1,23.1,24.1,25.1,26.1,27.1,28.1,29.1,30.1,31.1,32.1,33.1,34.1,35.1,36.1
0,2022.07,0.095238,0.238095,0.238095,0.238095,0.285714,0.333333,0.238095,0.238095,0.333333,0.333333,0.333333,0.476190,0.523810,0.476190,0.476190,0.476190,0.476190,0.523810,0.476190,0.380952,0.380952,0.380952,0.380952,0.428571,0.476190,0.476190,0.476190,0.476190,0.476190,0.476190,0.428571,0.428571,0.428571,0.428571,0.428571,,2022.07,0.091846,0.244091,0.244091,0.244091,0.262824,0.275571,0.201886,0.201886,0.237518,0.237518,0.237518,0.268612,0.280738,0.268612,0.268612,0.268612,0.264382,0.264382,0.264382,0.255227,0.255227,0.255227,0.255227,0.255227,0.255227,0.255227,0.255227,0.255227,0.255227,0.255227,0.250340,0.250340,0.250340,0.250340,0.250340
1,2022.08,0.113208,0.169811,0.207547,0.245283,0.264151,0.320755,0.358491,0.339623,0.396226,0.358491,0.358491,0.566038,0.584906,0.584906,0.584906,0.584906,0.584906,0.584906,0.603774,0.603774,0.584906,0.584906,0.584906,0.584906,0.584906,0.584906,0.584906,0.584906,0.566038,0.566038,0.566038,0.566038,0.547170,0.547170,0.547170,,2022.08,0.105470,0.164582,0.201853,0.225565,0.231627,0.254338,0.262143,0.252259,0.267264,0.258559,0.258559,0.301994,0.309814,0.309814,0.309814,0.309814,0.309814,0.309814,0.309814,0.309814,0.301389,0.301389,0.301389,0.301389,0.301389,0.301389,0.301389,0.301389,0.300970,0.300970,0.300970,0.300970,0.294058,0.294058,0.294058
2,2022.09,0.075000,0.125000,0.175000,0.250000,0.225000,0.225,0.200000,0.225000,0.200000,0.200000,0.200000,0.450000,0.450000,0.450000,0.475000,0.500000,0.475000,0.475000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.475000,0.475000,0.475000,0.475000,0.475000,0.475000,0.475000,0.475000,0.475000,,2022.09,0.085612,0.133278,0.159669,0.198923,0.186023,0.186023,0.169422,0.176344,0.156879,0.156879,0.156879,0.208416,0.210602,0.210602,0.210602,0.210602,0.210602,0.210602,0.208834,0.208834,0.208834,0.208834,0.208834,0.208834,0.208834,0.208834,0.208834,0.208834,0.208834,0.208834,0.208834,0.208834,0.208834,0.208834,0.208834
3,2022.1,0.026316,0.026316,0.078947,0.105263,0.131579,0.105263,0.157895,0.157895,0.184211,0.184211,0.184211,0.394737,0.368421,0.394737,0.342105,0.394737,0.421053,0.368421,0.368421,0.394737,0.394737,0.394737,0.394737,0.394737,0.394737,0.394737,0.394737,0.394737,0.394737,0.394737,0.394737,0.368421,0.368421,0.368421,,,2022.1,0.037324,0.037324,0.063922,0.087481,0.106936,0.09818,0.138165,0.133433,0.152022,0.142374,0.142374,0.183479,0.182855,0.182855,0.182440,0.186586,0.186586,0.186586,0.186586,0.186586,0.186586,0.186586,0.186586,0.186586,0.186586,0.186586,0.186586,0.186586,0.186586,0.186586,0.186586,0.186586,0.186586,0.186586,
4,2022.11,0.073171,0.048780,0.073171,0.073171,0.097561,0.121951,0.146341,0.146341,0.146341,0.146341,0.146341,0.292683,0.317073,0.341463,0.341463,0.365854,0.390244,0.390244,0.390244,0.390244,0.390244,0.390244,0.390244,0.390244,0.390244,0.390244,0.390244,0.390244,0.390244,0.390244,0.390244,0.390244,0.390244,,,,2022.11,0.061848,0.044532,0.059078,0.059078,0.069724,0.091355,0.090962,0.088975,0.087349,0.087349,0.087349,0.150044,0.163378,0.163378,0.163378,0.159005,0.160195,0.160195,0.160195,0.160195,0.160195,0.160195,0.160195,0.160195,0.160195,0.160195,0.160195,0.160195,0.160195,0.160195,0.160195,0.160195,0.160195,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,2025.01,0.007250,0.024540,0.050753,0.077524,0.115449,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2025.01,0.006632,0.020939,0.039872,0.058366,0.079989,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
113,2025.02,0.011634,0.031361,0.059686,0.094082,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2025.02,0.010024,0.025457,0.047273,0.069887,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
114,2025.03,0.017228,0.037519,0.060873,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2025.03,0.013994,0.029641,0.045873,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
115,2025.04,0.013727,0.041180,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2025.04,0.011681,0.033750,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


# vintage（金额比例）

In [43]:
# vintage（金额比例）
def update_col2(df2):
    # 修改列名
    df2_col_list = df2.columns.to_list()
    for col in df2_col_list:
        if col=='Month' or col=='Month.1':
            df2.rename(columns={f'{col}': 'search_time'}, inplace=True)
        else:
            col_new = str(col).split('.')[0]
            # 按.分割取第一部分，并加上前缀'sort'，inplace=True 表示直接在原DataFrame上修改。
            df2.rename(columns={col: f'sort{col_new}'}, inplace=True)
            # 将列中的数值转换为百分比格式，保留两位小数，NaN值保持不变
            df2.loc[:, f'sort{col_new}'] = df2[f'sort{col_new}'].apply(lambda x: x if pd.isna(x) else format(x, '.2%'))
    df2.loc[:, 'search_time'] = df2.search_time.apply(lambda x: str(x).split('.')[0]+'-'+(str(x).split('.')[1] if str(x).split('.')[1]!='1' else '10'))
    return df2
# 取到固定范围的数据 订单维度
df2_1 = df2.iloc[:37, :36]
df2_1 = update_col2(df2_1)
df2_2 = df2.iloc[41:77, :35]
df2_2.columns = df2.iloc[40, :35]
df2_2 = update_col2(df2_2)
df2_3 = df2.iloc[82:117, :34]
df2_3.columns = df2.iloc[81, :34]
df2_3 = update_col2(df2_3)
# 金额维度
df2_4 = df2.iloc[:37, 37:73]
df2_4 = update_col2(df2_4)
df2_5 = df2.iloc[41:77, 37:72]
df2_5.columns = df2.iloc[40, 37:72]
df2_5 = update_col2(df2_5)
df2_6 = df2.iloc[82:117, 37:71]
df2_6.columns = df2.iloc[81, 37:71]
df2_6 = update_col2(df2_6)
# 写入数据库
df2_list = [df2_1, df2_2, df2_3, df2_4, df2_5, df2_6]
table_list2 = ['Vintage30_order', 'Vintage60_order', 'Vintage90_order', 'Vintage30_purchase_amount', 'Vintage60_purchase_amount', 'Vintage90_purchase_amount']
for idx, table_name in enumerate(table_list2):
    insert_data(df2_list[idx], table_name)

数据库连接成功！
成功删除表 Vintage30_order 的数据
数据成功写入数据库！
数据库连接成功！
成功删除表 Vintage60_order 的数据
数据成功写入数据库！
数据库连接成功！
成功删除表 Vintage90_order 的数据
数据成功写入数据库！
数据库连接成功！
成功删除表 Vintage30_purchase_amount 的数据
数据成功写入数据库！
数据库连接成功！
成功删除表 Vintage60_purchase_amount 的数据
数据成功写入数据库！
数据库连接成功！
成功删除表 Vintage90_purchase_amount 的数据
数据成功写入数据库！


# 每月截面数据

In [44]:
df3.iloc[11:, :].T

Unnamed: 0,11,12,13,14,15,16,17,18
统计时点,1.正常,2.M1,3.M2,4.M3,5.M4,6.M5,7.M6,8.M6+
2022.07,97516.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022.08,349806.87,10748.26,,,,,,
2022.09,460715.33,56435.21,10748.26,,,,,
2022.1,544927.17,61965.46,53521.5,10748.26,,,,
2022.11,723884.92,66741.51,41510.2,53521.5,10748.26,,,
2022.12,1085051.35,54314.47,32952.53,41510.2,53521.5,10748.26,,
2023.01,2153544.0,74663.86,37101.06,32952.53,41510.2,53521.5,10748.26,
2023.02,3446111.85,102319.87,38374.72,27928.07,32804.73,41510.2,53521.5,10748.26
2023.03,4574245.2,244997.74,91684.32,29997.21,27928.07,32804.73,41510.2,57138.54


In [45]:
def update_col3(df3, key=None):
    # key 用来判断是否需要将数值类型修改为int类型
    # 获取第一行之后的数据
    df3 = df3.iloc[1:, :]
    df3_col_list = df3.columns.to_list()
    # 循环列名，对列名和数据进行修改
    for col in df3_col_list:
        col_new = col.split('.')[1]
        if col_new=='正常':
            col_new = 'C'
        elif col_new=='M6+':
            col_new = 'M6_Plus'
        
        df3.rename(columns={col: f'{col_new}'}, inplace=True)
        if key=='int':
            df3.loc[:, col_new] = df3[col_new].apply(lambda x: x if pd.isna(x) else int(x))
    df3.reset_index(names=['search_time'], inplace=True)
    df3.loc[:, 'search_time'] = df3.search_time.apply(lambda x: str(x).split('.')[0]+'-'+(str(x).split('.')[1] if str(x).split('.')[1]!='1' else '10'))
    return df3
# '每月截面数据' 订单维度
df3_new1 = df3.iloc[:8, :].T
df3_new1.columns=df3_new1.iloc[0, :]
df3_new1 = update_col3(df3_new1, 'int')

# 金额维度
df3_new2 = df3.iloc[11:, :].T
df3_new2.columns=df3_new2.iloc[0, :]
df3_new2 = update_col3(df3_new2)

# 写入数据
df3_list = [df3_new1, df3_new2]
table_list3 = ['rental_status_order', 'rental_status_purchase_amount']
for idx, table_name in enumerate(table_list3):
    insert_data(df3_list[idx], table_name)
# df3_new2

数据库连接成功！
成功删除表 rental_status_order 的数据
数据成功写入数据库！
数据库连接成功！
成功删除表 rental_status_purchase_amount 的数据
数据成功写入数据库！


# 首逾率

In [46]:
df4#.iloc[:39, :]

Unnamed: 0,Unnamed: 1,Unnamed: 2,FPD1+,FPD7+,FPD15+,FPD30+,SPD1+,SPD30+,TPD1+,TPD30+,%FPD1+,%FPD7+,%FPD15+,%FPD30+,%SPD1+,%SPD30+,%TPD1+,%TPD30+
0,2022-07,21,4,3,2,2,3,2,1,,0.190476,0.142857,0.095238,0.095238,0.142857,0.095238,0.047619,0
1,2022-08,53,19,7,6,6,6,2,5,1,0.358491,0.132075,0.113208,0.113208,0.113208,0.037736,0.09434,0.018868
2,2022-09,40,9,6,3,3,6,1,1,,0.225,0.15,0.075,0.075,0.15,0.025,0.025,0
3,2022-10,38,5,2,2,1,4,,3,1,0.131579,0.052632,0.052632,0.026316,0.105263,0,0.078947,0.026316
4,2022-11,41,10,3,3,3,3,,2,,0.243902,0.073171,0.073171,0.073171,0.073171,0,0.04878,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,2025-05,31912406.1,2037641.66,1006821.12,627051.58,494506.95,3073445.7,648067.33,2011110.1,382733.42,0.063851,0.03155,0.019649,0.015496,0.096309,0.020308,0.06302,0.011993
79,2025-06,38861474,3150357.1,1350644.06,893123.1,722396.25,4019052.89,841043.3,2899636.92,,0.081066,0.034755,0.022982,0.018589,0.10342,0.021642,0.074615,
80,2025-07,46566810,4119161.29,1664191.93,1034389.01,692390.27,5062796.43,,,,0.088457,0.035738,0.022213,0.014869,0.108721,,,
81,2025-08,42514451,4138275.02,1967494,,,,,,,0.097338,0.046278,,,,,,


In [47]:
def update_col4(df4):
    df4_col_list = df4.columns.to_list()
    for col in df4_col_list:
        # 第一、二列列名需修改
        if col=='Unnamed: 1':
            col_new = 'search_time'
        elif col=='Unnamed: 2':
            col_new = 'order_number'
        elif '%' in col:
            col_new = col.strip('%').strip('+')+'_rate_plus'
        else:
            col_new = col.strip('+')+'_plus'
        df4.rename(columns={col: col_new}, inplace=True)
        if '_rate_plus' in col_new:
            df4.loc[:, col_new] = df4[col_new].apply(lambda x: x if pd.isna(x) or type(x)==str else format(x, '.2%'))
    # df4.iloc[:-1, 0] = df4.iloc[:-1, :].search_time.apply(lambda x: str(x).split('.')[0]+'-'+(str(x).split('.')[1] if str(x).split('.')[1]!='1' else '10'))
    return df4
# '首逾率' 订单维度
df4_1 = df4.iloc[:39, :]
df4_1 = update_col4(df4_1)

# 金额维度
df4_2 = df4.iloc[44:, :]
df4_2 = update_col4(df4_2)

# 写入数据
df4_list = [df4_1, df4_2]
table_list4 = ['overdue_fst_order', 'overdue_fst_purchase_amount']
for idx, table_name in enumerate(table_list4):
    insert_data(df4_list[idx], table_name)

数据库连接成功！
成功删除表 overdue_fst_order 的数据
数据成功写入数据库！
数据库连接成功！
成功删除表 overdue_fst_purchase_amount 的数据
数据成功写入数据库！


# 迁徙率

In [48]:
df5#.iloc[9:, :].T

Unnamed: 0,时间,2022.07,2022.08,2022.09,2022.1,2022.11,2022.12,2023.01,2023.02,2023.03,2023.04,2023.05,2023.06,2023.07,2023.08,2023.09,2023.1,2023.11,2023.12,2024.01,2024.02,2024.03,2024.04,2024.05,2024.06,2024.07,2024.08,2024.09,2024.1,2024.11,2024.12,2025.01,2025.02,2025.03,2025.04,2025.05,2025.06,2025.07,2025.08
0,C_M1,0.1,0.1408,0.08,0.0775,0.05,0.0651,0.0434,0.0673,0.0545,0.0474,0.0472,0.0611,0.0529,0.0595,0.0526,0.0437,0.0447,0.0506,0.0502,0.0515,0.057,0.048,0.0472,0.0564,0.0481,0.0471,0.0499,0.0467,0.0496,0.0465,0.0355,0.051,0.049,0.0515,0.0503,0.06,0.0581,0.0697
1,M1_M2,0.0,1.0,0.8,0.6,0.3846,0.5714,0.5625,0.7143,0.6939,0.5965,0.6329,0.5437,0.4731,0.5169,0.5539,0.569,0.5597,0.5661,0.55,0.5179,0.5816,0.626,0.5348,0.5405,0.5174,0.5513,0.5966,0.5525,0.6041,0.6048,0.5507,0.5766,0.5621,0.5662,0.579,0.5381,0.5943,0.5707
2,M2_M3,0.0,,1,1.0,1.0,1.0,0.75,0.7,0.8333,0.8286,0.9706,0.8824,0.7627,0.8313,0.8205,0.8537,0.8678,0.9016,0.851,0.8259,0.9237,0.8826,0.9036,0.853,0.8626,0.8987,0.893,0.9095,0.9187,0.9143,0.8736,0.9047,0.9058,0.8824,0.9313,0.8984,0.9208,0.9506
3,M3_M4,0.0,,,1.0,1.0,1.0,1.0,1.0,1.0,0.6667,0.9032,0.9394,0.8936,0.8936,0.8592,0.901,0.8794,0.8812,0.8706,0.8859,0.955,0.9467,0.9412,0.9159,0.9309,0.9582,0.956,0.9542,0.9507,0.9626,0.8908,0.9353,0.9493,0.9462,0.972,0.9484,0.9542,0.9701
4,M4_M5,0.0,,,,1.0,1.0,1.0,1.0,1.0,0.7143,0.9091,0.9643,0.9355,0.9348,0.9556,0.9559,0.9677,0.937,0.9315,0.9494,0.9758,0.9486,0.9664,0.9467,0.9693,0.9698,0.95,0.9676,0.9637,0.9665,0.9263,0.9718,0.9894,0.9654,0.97,0.9559,0.9612,0.9962
5,M5_M6,0.0,,,,,1.0,1.0,1.0,1.0,1.0,1.0,0.9091,0.9259,0.9062,0.8864,0.9348,0.9385,0.9462,0.8837,0.9371,0.9615,0.9207,0.9519,0.9442,0.9547,0.9757,0.9565,0.9745,0.9758,0.9803,0.9346,0.9633,0.9879,0.9501,0.9947,0.9782,0.9808,0.9874
6,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,二、迁徙率-金额维度,,,rollrate_purchase_amount,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,时间,2022.07,2022.08,2022.09,2022.1,2022.11,2022.12,2023.01,2023.02,2023.03,2023.04,2023.05,2023.06,2023.07,2023.08,2023.09,2023.1,2023.11,2023.12,2024.01,2024.02,2024.03,2024.04,2024.05,2024.06,2024.07,2024.08,2024.09,2024.1,2024.11,2024.12,2025.01,2025.02,2025.03,2025.04,2025.05,2025.06,2025.07,2025.08
9,C_M1,0.1102,0.1613,0.1166,0.1006,0.045,0.0663,0.0402,0.0671,0.06,0.0464,0.0457,0.0645,0.0494,0.0603,0.0516,0.0448,0.0441,0.0483,0.0413,0.0462,0.0494,0.0429,0.0438,0.0481,0.0411,0.0403,0.0435,0.0405,0.0401,0.0395,0.0328,0.0454,0.0423,0.0453,0.0452,0.053,0.0519,0.0647


In [49]:
def update_col5(df5):
    df5_col_list = df5.columns.to_list()
    for col in df5_col_list:
        df5.loc[:, col] = df5[col].apply(lambda x: x if pd.isna(x) else format(x, '.2%'))
    df5.reset_index(names=['search_time'], inplace=True)
    df5.loc[:, 'search_time'] = df5.search_time.apply(lambda x: str(x).split('.')[0]+'-'+(str(x).split('.')[1] if str(x).split('.')[1]!='1' else '10'))
    return df5
# '迁徙率' 订单口径
df5_1 = df5.iloc[:6, :].T
df5_1.columns=df5_1.iloc[0, :]
df5_1 = df5_1.iloc[1:, :]
df5_1 = update_col5(df5_1)

# 金额口径
df5_2 = df5.iloc[9:, :].T
df5_2.columns=df5_2.iloc[0, :]
df5_2 = df5_2.iloc[1:, :]
df5_2 = update_col5(df5_2)

# 写入数据库
df5_list = [df5_1, df5_2]
table_list5 = ['rollrate_order', 'rollrate_purchase_amount']
for idx, table_name in enumerate(table_list5):
    insert_data(df5_list[idx], table_name)


数据库连接成功！
成功删除表 rollrate_order 的数据
数据成功写入数据库！
数据库连接成功！
成功删除表 rollrate_purchase_amount 的数据
数据成功写入数据库！
