In [30]:
import pandas as pd
from datetime import datetime

In [31]:
file = r"C:\Users\BXDM\Documents\股票数据\20250807.csv"
df = pd.read_csv(file,
                 # encoding='gbk',
                 dtype={'代码': str})  # 处理股票代码由 000XXX 变成 XXX的问题

In [32]:
column_mapping = {
    '代码': 'stock_code',  # StockCode: 股票代码
    '开盘': 'open_price',  # Open: 开盘价
    '最新': 'close_price',  # LatestPrice: 最新股价
    '最高': 'high_price',  # High: 最高价
    '最低': 'low_price',  # Low: 最低价
    '总量': 'volume',  # TotalVolume: 总成交量(/股)
    '涨幅%': 'pct_change',  # PriceChangeRate: 涨幅百分比
}

In [33]:
# 将列名从中文转换为英文
df = df[column_mapping.keys()].rename(columns=column_mapping)

In [34]:
# 成交量转换为股数

import re

def convert_volume_to_shares(volume_str):
    """
    将成交量字符串转换为股数（整数）：
    - "万" → 万手 → 股（1万手 = 100万股）
    - "M" → 百万股 → 股（1M = 1,000,000股）
    - 纯数字 → 手 → 股（1手 = 100股）
    """
    if pd.isna(volume_str):  # 处理空值
        return 0
    
    volume_str = str(volume_str).strip()  # 转为字符串并去除空格
    
    # 情况1：带"万"（万手）
    if "万" in volume_str:
        num = float(re.sub(r"[^\d.]", "", volume_str))  # 提取数字（如"241万"→241）
        return int(num * 100 * 10000)  # 万手 → 股
    
    # 情况2：带"M"（百万股）
    elif "M" in volume_str.upper():
        num = float(re.sub(r"[^\d.]", "", volume_str))  # 提取数字（如"1.5M"→1.5）
        return int(num * 1000000)  # 百万股 → 股
    
    # 情况3：纯数字（默认单位是手）
    else:
        try:
            num = float(volume_str)
            return int(num * 100)  # 手 → 股
        except ValueError:
            return 0  # 无效数据返回0

In [35]:
# 设置日期为当天（通过datetime获取当前日期）
today = datetime.today().strftime('%Y-%m-%d')  # 日期格式为 'YYYY-MM-DD'
# today = '2025-08-01'
df['trade_date'] = today
today

'2025-08-07'

In [36]:
df['volume'] = df['volume'].apply(convert_volume_to_shares)
df["update_time"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')    # 统一更新时间

In [37]:
df


Unnamed: 0,stock_code,open_price,close_price,high_price,low_price,volume,pct_change,trade_date,update_time
0,002377,3.25,3.37,3.37,3.2,54700000,10.13,2025-08-07,2025-08-07 18:28:20
1,601718,4.32,4.8,4.8,4.22,424000000,10.09,2025-08-07,2025-08-07 18:28:20
2,000536,4.76,5.24,5.24,4.75,255000000,10.08,2025-08-07,2025-08-07 18:28:20
3,603518,9.84,10.84,10.84,9.83,37299999,10.05,2025-08-07,2025-08-07 18:28:20
4,603648,11.28,12.39,12.39,11.23,30900000,10.04,2025-08-07,2025-08-07 18:28:20
...,...,...,...,...,...,...,...,...,...
3049,002097,16.81,15.75,17.15,15.75,288000000,-10,2025-08-07,2025-08-07 18:28:20
3050,003036,21.79,19.62,21.99,19.62,7680000,-10,2025-08-07,2025-08-07 18:28:20
3051,002550,11.5,10.7,11.5,10.7,127000000,-10.01,2025-08-07,2025-08-07 18:28:20
3052,603059,49.47,40.47,49.47,40.47,19600000,-10.01,2025-08-07,2025-08-07 18:28:20


In [38]:
import sqlite3

conn = sqlite3.connect('stock_data.db')

update_fields = [col for col in df.columns if col not in ['stock_code', 'trade_date']]
set_clause = ', '.join([f"{col}=excluded.{col}" for col in update_fields])
placeholders = ', '.join(['?'] * len(df.columns))

sql = f"""
INSERT INTO stock_daily_data ({', '.join(df.columns)})
VALUES ({placeholders})
ON CONFLICT(stock_code, trade_date) DO UPDATE SET
{set_clause}
"""

conn.executemany(sql, df.values.tolist())
conn.commit()
conn.close()