In [9]:
import time
import logging
import requests
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
import tushare as ts  # 用于获取股票详细数据，需自行注册获取token

# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class THSFetcher:
    """同花顺热搜股票爬取器"""
    
    def __init__(self):
        self.url = ""  # 示例URL，实际可能不同
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }
        
    def fetch_hot_stocks(self, top_n=50):
        """获取同花顺热搜前N的股票"""
        try:
            params = {"limit": top_n, "type": "hot"}
            response = requests.get(self.url, headers=self.headers, params=params)
            response.raise_for_status()
            
            data = response.json()
            # 解析数据，实际解析需根据API返回格式调整
            stocks = []
            for item in data.get("data", [])[:top_n]:
                stocks.append({
                    "code": item.get("code"),
                    "name": item.get("name"),
                    "rank": item.get("rank"),
                    "hot_score": item.get("hotScore"),
                    "fetch_time": datetime.now()
                })
            
            logger.info(f"成功获取同花顺热搜前{len(stocks)}只股票")
            return pd.DataFrame(stocks)
            
        except Exception as e:
            logger.error(f"爬取同花顺热搜股票失败: {str(e)}")
            return None

class StockDataProcessor:
    """股票数据处理器"""
    
    def __init__(self, ts_token):
        ts.set_token(ts_token)
        self.pro = ts.pro_api()
        self.scaler = StandardScaler()
        
    def get_stock_data(self, code, start_date=None, end_date=None):
        """获取股票历史数据"""
        try:
            # 转换为符合tushare要求的代码格式
            if code.startswith('6'):
                ts_code = f"{code}.SH"
            else:
                ts_code = f"{code}.SZ"
                
            if not start_date:
                # 默认获取最近30天数据
                start_date = (datetime.now() - pd.Timedelta(days=30)).strftime('%Y%m%d')
            if not end_date:
                end_date = datetime.now().strftime('%Y%m%d')
                
            df = ts.pro_bar(ts_code=ts_code, adj='qfq', start_date=start_date, end_date=end_date)
            if df is None or df.empty:
                logger.warning(f"未获取到股票{code}的数据")
                return None
                
            # 按日期排序
            df = df.sort_values('trade_date')
            return df
            
        except Exception as e:
            logger.error(f"获取股票{code}数据失败: {str(e)}")
            return None
    
    def preprocess_data(self, df):
        """预处理数据，为模型做准备"""
        if df is None or df.empty:
            return None
            
        # 选择需要的特征
        features = ['open', 'high', 'low', 'close', 'vol', 'amount']
        df = df[features].copy()
        
        # 计算技术指标
        df['ma5'] = df['close'].rolling(window=5).mean()
        df['ma10'] = df['close'].rolling(window=10).mean()
        df['rsi'] = self.calculate_rsi(df['close'], 14)
        
        # 填充缺失值
        df = df.fillna(method='ffill').dropna()
        
        # 标准化
        scaled_data = self.scaler.fit_transform(df)
        
        return scaled_data
    
    def calculate_rsi(self, prices, period=14):
        """计算RSI指标"""
        deltas = np.diff(prices)
        gains = deltas.copy()
        losses = deltas.copy()
        gains[gains < 0] = 0
        losses[losses > 0] = 0
        
        avg_gain = np.mean(gains[:period])
        avg_loss = np.abs(np.mean(losses[:period]))
        
        rs = avg_gain / avg_loss if avg_loss != 0 else 0
        rsi = [100 - (100 / (1 + rs))]
        
        for i in range(period, len(prices)):
            delta = deltas[i-1]
            
            gain = delta if delta > 0 else 0
            loss = -delta if delta < 0 else 0
            
            avg_gain = (avg_gain * (period - 1) + gain) / period
            avg_loss = (avg_loss * (period - 1) + loss) / period
            
            rs = avg_gain / avg_loss if avg_loss != 0 else 0
            rsi.append(100 - (100 / (1 + rs)))
            
        # 前面补None，使长度一致
        return [None] * (period - 1) + rsi

class StockPredictionModel:
    """股票预测神经网络模型"""
    
    def __init__(self, input_shape):
        self.model = self.build_model(input_shape)
        
    def build_model(self, input_shape):
        """构建LSTM神经网络模型"""
        model = Sequential()
        model.add(LSTM(50, return_sequences=True, input_shape=input_shape))
        model.add(LSTM(50, return_sequences=False))
        model.add(Dense(25))
        model.add(Dense(1))  # 预测收盘价
        
        model.compile(optimizer='adam', loss='mean_squared_error')
        return model
    
    def train(self, X_train, y_train, epochs=25, batch_size=32):
        """训练模型"""
        self.model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs)
        
    def predict(self, X):
        """预测股票价格"""
        return self.model.predict(X)

class TradingExecutor:
    """交易执行器，模拟或实际执行交易"""
    
    def __init__(self, account_config=None):
        self.account_config = account_config or {}
        self.positions = {}  # 持有仓位
        self.balance = self.account_config.get('initial_balance', 100000)  # 初始资金
        
    def connect(self):
        """连接到证券账户，实际实现需对接券商API"""
        logger.info("连接到证券账户...")
        # 实际对接券商API的代码
        return True
        
    def get_real_time_data(self, code):
        """获取实时行情数据"""
        # 实际实现需对接实时行情API
        logger.info(f"获取股票{code}实时数据")
        # 这里返回模拟数据
        return {
            'code': code,
            'price': np.random.uniform(9, 11),
            'volume': np.random.randint(100000, 1000000),
            'timestamp': datetime.now()
        }
        
    def buy(self, code, price, amount):
        """买入股票"""
        total_cost = price * amount
        if total_cost > self.balance:
            logger.warning(f"资金不足，无法买入{code} {amount}股")
            return False
            
        self.balance -= total_cost
        if code in self.positions:
            self.positions[code] += amount
        else:
            self.positions[code] = amount
            
        logger.info(f"买入 {code} {amount}股，价格: {price}，总成本: {total_cost}，剩余资金: {self.balance}")
        return True
        
    def sell(self, code, price, amount):
        """卖出股票"""
        if code not in self.positions or self.positions[code] < amount:
            logger.warning(f"持仓不足，无法卖出{code} {amount}股")
            return False
            
        total_income = price * amount
        self.balance += total_income
        self.positions[code] -= amount
        
        if self.positions[code] == 0:
            del self.positions[code]
            
        logger.info(f"卖出 {code} {amount}股，价格: {price}，总收入: {total_income}，总资产: {self.balance}")
        return True
        
    def get_account_status(self):
        """获取账户状态"""
        return {
            'balance': self.balance,
            'positions': self.positions,
            'total_assets': self.balance + sum(code['price'] * qty for code, qty in self.positions.items())
        }

class QuantTradingSystem:
    """量化交易系统主类"""
    
    def __init__(self, ts_token, account_config=None):
        self.ths_fetcher = THSFetcher()
        self.data_processor = StockDataProcessor(ts_token)
        self.trading_executor = TradingExecutor(account_config)
        self.models = {}  # 存储各股票的模型
        self.is_running = False
        
    def prepare_models(self, top_n=50):
        """为热搜股票准备预测模型"""
        # 获取热搜股票
        hot_stocks = self.ths_fetcher.fetch_hot_stocks(top_n)
        if hot_stocks is None or hot_stocks.empty:
            logger.error("无法获取热搜股票，无法准备模型")
            return
            
        # 为每只股票训练模型
        for _, stock in hot_stocks.iterrows():
            code = stock['code']
            logger.info(f"为股票{code}准备模型...")
            
            # 获取历史数据
            stock_data = self.data_processor.get_stock_data(code)
            if stock_data is None:
                continue
                
            # 预处理数据
            processed_data = self.data_processor.preprocess_data(stock_data)
            if processed_data is None:
                continue
                
            # 准备训练数据
            X, y = self.create_sequences(processed_data)
            if X is None or y is None:
                continue
                
            # 创建并训练模型
            model = StockPredictionModel((X.shape[1], X.shape[2]))
            model.train(X, y)
            
            # 保存模型
            self.models[code] = {
                'model': model,
                'scaler': self.data_processor.scaler
            }
            
        logger.info(f"完成模型准备，共为{len(self.models)}只股票创建了模型")
    
    def create_sequences(self, data, time_steps=60):
        """创建用于LSTM的序列数据"""
        X, y = [], []
        for i in range(time_steps, len(data)):
            X.append(data[i-time_steps:i, :])
            y.append(data[i, 3])  # 预测收盘价
        
        if len(X) == 0 or len(y) == 0:
            return None, None
            
        return np.array(X), np.array(y)
    
    def make_trading_decision(self, code):
        """根据模型预测做出交易决策"""
        if code not in self.models:
            logger.warning(f"没有股票{code}的模型，无法做出决策")
            return None
            
        # 获取实时数据
        real_time_data = self.trading_executor.get_real_time_data(code)
        if not real_time_data:
            return None
            
        # 获取最近的历史数据用于预测
        recent_data = self.data_processor.get_stock_data(code, end_date=datetime.now().strftime('%Y%m%d'))
        if recent_data is None:
            return None
            
        # 预处理
        processed_data = self.data_processor.preprocess_data(recent_data)
        if processed_data is None:
            return None
            
        # 准备预测数据
        time_steps = 60
        if len(processed_data) < time_steps:
            logger.warning(f"股票{code}数据不足，无法预测")
            return None
            
        X_pred = np.array([processed_data[-time_steps:, :]])
        
        # 预测
        model = self.models[code]['model']
        predicted_price = model.predict(X_pred)[0][0]
        
        current_price = real_time_data['price']
        
        # 决策逻辑：如果预测价格比当前价格高5%以上，则买入；如果低5%以上，则卖出
        decision = None
        if predicted_price > current_price * 1.05:
            decision = 'buy'
        elif predicted_price < current_price * 0.95:
            decision = 'sell'
            
        logger.info(f"股票{code} - 当前价格: {current_price}, 预测价格: {predicted_price}, 决策: {decision}")
        return {
            'decision': decision,
            'current_price': current_price,
            'predicted_price': predicted_price
        }
    
    def run(self, interval=60):
        """运行量化交易系统"""
        self.is_running = True
        logger.info("启动量化交易系统...")
        
        # 连接账户
        if not self.trading_executor.connect():
            logger.error("无法连接到证券账户，系统启动失败")
            self.is_running = False
            return
            
        # 初始准备模型
        self.prepare_models()
        
        try:
            while self.is_running:
                # 定期更新热搜股票和模型
                current_hour = datetime.now().hour
                # 每天9点和14点更新一次模型
                if (current_hour == 9 and datetime.now().minute < 30) or current_hour == 14:
                    self.prepare_models()
                
                # 对每只股票做出决策并执行
                for code in list(self.models.keys()):
                    decision = self.make_trading_decision(code)
                    if decision and decision['decision'] == 'buy':
                        # 买入100股
                        self.trading_executor.buy(code, decision['current_price'], 100)
                    elif decision and decision['decision'] == 'sell' and code in self.trading_executor.positions:
                        # 卖出所有持仓
                        self.trading_executor.sell(code, decision['current_price'], self.trading_executor.positions[code])
                
                # 打印账户状态
                account_status = self.trading_executor.get_account_status()
                logger.info(f"账户状态 - 资金: {account_status['balance']}, 持仓: {account_status['positions']}")
                
                # 等待下一个周期
                time.sleep(interval)
                
        except KeyboardInterrupt:
            logger.info("用户中断，停止交易系统")
        finally:
            self.is_running = False
            logger.info("量化交易系统已停止")

    

In [10]:
TUSHARE_TOKEN = "6aace479620bdd97e9dd2ecd00f60da135de642f9a88b40a04a00568"

# 账户配置
ACCOUNT_CONFIG = {
    'initial_balance': 100000  # 初始资金
}

# 创建并运行量化交易系统
trading_system = QuantTradingSystem(TUSHARE_TOKEN, ACCOUNT_CONFIG)
trading_system.run(interval=60)  # 每60秒检查一次


2025-09-27 22:52:46,059 - INFO - 启动量化交易系统...
2025-09-27 22:52:46,059 - INFO - 连接到证券账户...
2025-09-27 22:52:47,808 - ERROR - 爬取同花顺热搜股票失败: 404 Client Error: Not Found for url: https://www.ths.com.cn/api/index/hotStockList?limit=50&type=hot
2025-09-27 22:52:47,810 - ERROR - 无法获取热搜股票，无法准备模型
2025-09-27 22:52:47,811 - INFO - 账户状态 - 资金: 100000, 持仓: {}
2025-09-27 22:53:47,820 - INFO - 账户状态 - 资金: 100000, 持仓: {}
2025-09-27 22:54:47,825 - INFO - 账户状态 - 资金: 100000, 持仓: {}
2025-09-27 22:55:47,830 - INFO - 用户中断，停止交易系统
2025-09-27 22:55:47,833 - INFO - 量化交易系统已停止


In [15]:
import requests
from urllib.parse import quote
from bs4 import BeautifulSoup
import sys
import time
import random
import csv

#必要参数设置
MAX_PAGE = 165   #最大页数
PAGE_TRACK = 1   #追踪到了第几页
MAX_GET = 1      #获取最大尝试次数
MAX_PARSE = 1    #解析尝试最大次数
MAX_CSV = 1      #文件保存最大次数
MAX_PROXY =1     #获取代理的最大次数
MAX_START = 1    #MAX_*的初始值
MAX_TRY = 4      #最大尝试次数
FLAG = 0         #用于标识，是否使用 url_omi() 函数

#初始链接
URL_START = "http://q.10jqka.com.cn//index/index/board/all/field/zdf/order/desc/page/"
PARAMS = "/ajax/1/"


#第一次爬取的 html 缺失的页面 的url 列表
#先进先出的列表
PAGE_LIST = [] 

#代理池接口
PROXY_POOL_API = "http://127.0.0.1:5555/random"  

headers = {
            'Accept': 'text/html, */*; q=0.01',
            'Accept-Encoding': 'gzip, deflate, sdch',
            'Accept-Language': 'zh-CN,zh;q=0.8',
            'Connection': 'keep-alive',
            'Cookie': 'spversion=20130314; __utma=156575163.1163133091.1530233537.1530289428.1530369413.3; __utmz=156575163.1530369413.3.3.utmcsr=stockpage.10jqka.com.cn|utmccn=(referral)|utmcmd=referral|utmcct=/; Hm_lvt_78c58f01938e4d85eaf619eae71b4ed1=1530444468,1530505958,1530506333,1530516152; Hm_lpvt_78c58f01938e4d85eaf619eae71b4ed1=1530516152; historystock=300033%7C*%7C1A0001; v=AiDRI3i0b1qEZNNemO_FOZlE8SXqKQQBpg9Y4Jox7pbOH8oZQjnUg_YdKIHp',
            'hexin-v': 'AiDRI3i0b1qEZNNemO_FOZlE8SXqKQQBpg9Y4Jox7pbOH8oZQjnUg_YdKIHp',
            'Host': 'q.10jqka.com.cn',
            'Referer': 'http://q.10jqka.com.cn/',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
            'X-Requested-With': 'XMLHttpRequest'}

#数据库部分
MONGO_URL = ''
class crawl(object):

    def __init__(self):
        self.MAX_PAGE = MAX_PAGE
        self.PAGE_TRACK = PAGE_TRACK    #跟踪次数
        self.FLAG = FLAG                #设置标志位
        self.PAGE_LIST = PAGE_LIST      #第一次获取失败的 html 的 列表
        self.URL_START =  URL_START     #初始链接
        self.PARAMS = PARAMS            #url 构造参数
        self.PROXY_POOL_API = "http://127.0.0.1:5555/random" 
        self.proxy_save = None   #用于存储代理
        self.proxy_con  = 0      #用于控制代理什么时候更换 
        self.fieldnames = ['代码','名称','现价','涨跌幅']
        self.file = open("ths.csv",'a', newline='')   #打开文件
        self.writer = csv.DictWriter(self.file, fieldnames = self.fieldnames)
        self.writer.writeheader()



    
    def proxy_get(self, num_retries=2):
        """
        #代理获取模块

        """
        try:
            r_proxy = requests.get(self.PROXY_POOL_API, timeout = 5)
            proxy = r_proxy.text    #指定代理
            print("代理是", proxy)
            proxies = {
                "http": 'http://' + proxy,
                "https": 'https://' + proxy,
                }
            return proxies
        except:
            if num_retries > 0:
                print("代理获取失败，重新获取")
                self.proxy_get(num_retries-1)

   
    def url_yield(self):
        """
        :func 用于生成url
        :yield items
        """
        for i in range(1, self.MAX_PAGE + 1 ):
            self.PAGE_TRACK = i         #页面追踪
            self.FLAG += 1              #每次加1
            print('FLAG 是：', self.FLAG)
            url = "{}{}{}".format(self.URL_START, i, self.PARAMS) 
            yield url

    def url_omi(self):
        print("开始补漏")
        length_pl = len(self.PAGE_LIST) 
        if length_pl != 0:          #判断是否为空
            for i in range(length_pl):
                self.PAGE_TRACK = self.PAGE_LIST.pop(0)                  #构造一个动态列表, 弹出第一个元素
                url = "{}{}{}".format(self.URL_START, self.PAGE_TRACK, self.PARAMS) 
                yield url
    


    def downloader(self, url, num_retries=3):
        if self.proxy_con == 0:
            proxies = self.proxy_get()  #获取代理
        else:
            proxies = self.proxy_save   #继续使用代理
        self.proxy_save = proxies       #更换代理值
        headers_list = [{
                    'Accept': 'text/html, */*; q=0.01',
                    'Accept-Encoding': 'gzip, deflate, sdch',
                    'Accept-Language': 'zh-CN,zh;q=0.8',
                    'Connection': 'keep-alive',
                    'Cookie':'log=; Hm_lvt_78c58f01938e4d85eaf619eae71b4ed1=1533992361,1533998469,1533998895,1533998953; Hm_lpvt_78c58f01938e4d85eaf619eae71b4ed1=1533998953; user=MDrAz9H9akQ6Ok5vbmU6NTAwOjQ2OTU0MjIzNDo3LDExMTExMTExMTExLDQwOzQ0LDExLDQwOzYsMSw0MDs1LDEsNDA7MSwxLDQwOzIsMSw0MDszLDEsNDA7NSwxLDQwOzgsMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDEsNDA6Ojo6NDU5NTQyMjM0OjE1MzM5OTkwNzU6OjoxNTMzOTk5MDYwOjg2NDAwOjA6MTZmOGFjOTgwMGNhMjFjZjRkMWZlMjk0NDQ4M2FhNDFkOmRlZmF1bHRfMjox; userid=459542234; u_name=%C0%CF%D1%FDjD; escapename=%25u8001%25u5996jD; ticket=7c92fb758f81dfa4399d0983f7ee5e53; v=Ajz6VIblS6HlDX_9PqmhBV0QDdH4NeBfYtn0Ixa9SCcK4daNPkWw77LpxLZl',
                    'hexin-v': 'AiDRI3i0b1qEZNNemO_FOZlE8SXqKQQBpg9Y4Jox7pbOH8oZQjnUg_YdKIHp',
                    'Host': 'q.10jqka.com.cn',
                    'Referer': 'http://q.10jqka.com.cn/',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
                    },{'Accept': 'text/html, */*; q=0.01', 
                    'Accept-Encoding': 'gzip, deflate, sdch', 
                    'Accept-Language': 'zh-CN,zh;q=0.8', 
                    'Connection': 'keep-alive', 
                    'Cookie': 'user=MDq62tH9NUU6Ok5vbmU6NTAwOjQ2OTU0MjA4MDo3LDExMTExMTExMTExLDQwOzQ0LDExLDQwOzYsMSw0MDs1LDEsNDA7MSwxLDQwOzIsMSw0MDszLDEsNDA7NSwxLDQwOzgsMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDEsNDA6Ojo6NDU5NTQyMDgwOjE1MzM5OTg4OTc6OjoxNTMzOTk4ODgwOjg2NDAwOjA6MTEwOTNhMzBkNTAxMWFlOTg0OWM1MzVjODA2NjQyMThmOmRlZmF1bHRfMjox; userid=459542080; u_name=%BA%DA%D1%FD5E; escapename=%25u9ed1%25u59965E; ticket=658289e5730da881ef99b521b65da6af; log=; Hm_lvt_78c58f01938e4d85eaf619eae71b4ed1=1533992361,1533998469,1533998895,1533998953; Hm_lpvt_78c58f01938e4d85eaf619eae71b4ed1=1533998953; v=AibgksC3Qd-feBV7t0kbK7PCd5e-B2rBPEueJRDPEskkk8xLeJe60Qzb7jDj', 'hexin-v': 'AiDRI3i0b1qEZNNemO_FOZlE8SXqKQQBpg9Y4Jox7pbOH8oZQjnUg_YdKIHp', 
                    'Host': 'q.10jqka.com.cn', 
                    'Referer': 'http://q.10jqka.com.cn/', 
                    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36', 
                    },
                    {'Accept': 'text/html, */*; q=0.01', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'zh-CN,zh;q=0.8', 'Connection': 'keep-alive', 'Cookie': 'user=MDq62sm9wM%2FR%2FVk6Ok5vbmU6NTAwOjQ2OTU0MTY4MTo3LDExMTExMTExMTExLDQwOzQ0LDExLDQwOzYsMSw0MDs1LDEsNDA7MSwxLDQwOzIsMSw0MDszLDEsNDA7NSwxLDQwOzgsMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDEsNDA6Ojo6NDU5NTQxNjgxOjE1MzM5OTg0NjI6OjoxNTMzOTk4NDYwOjg2NDAwOjA6MTAwNjE5YWExNjc2NDQ2MGE3ZGYxYjgxNDZlNzY3ODIwOmRlZmF1bHRfMjox; userid=459541681; u_name=%BA%DA%C9%BD%C0%CF%D1%FDY; escapename=%25u9ed1%25u5c71%25u8001%25u5996Y; ticket=4def626a5a60cc1d998231d7730d2947; log=; Hm_lvt_78c58f01938e4d85eaf619eae71b4ed1=1533992361,1533998469; Hm_lpvt_78c58f01938e4d85eaf619eae71b4ed1=1533998496; v=AvYwAjBHsS9PCEXLZexL20PSRyfuFzpQjFtutWDf4ll0o5zbyKeKYVzrvsAz', 'hexin-v': 'AiDRI3i0b1qEZNNemO_FOZlE8SXqKQQBpg9Y4Jox7pbOH8oZQjnUg_YdKIHp', 'Host': 'q.10jqka.com.cn', 'Referer': 'http://q.10jqka.com.cn/', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest'},
                    {'Accept': 'text/html, */*; q=0.01', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'zh-CN,zh;q=0.8', 'Connection': 'keep-alive', 'Cookie': 'Hm_lvt_78c58f01938e4d85eaf619eae71b4ed1=1533992361; Hm_lpvt_78c58f01938e4d85eaf619eae71b4ed1=1533992361; user=MDq62sm9SnpsOjpOb25lOjUwMDo0Njk1NDE0MTM6NywxMTExMTExMTExMSw0MDs0NCwxMSw0MDs2LDEsNDA7NSwxLDQwOzEsMSw0MDsyLDEsNDA7MywxLDQwOzUsMSw0MDs4LDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAxLDQwOjo6OjQ1OTU0MTQxMzoxNTMzOTk4MjA5Ojo6MTUzMzk5ODE2MDo4NjQwMDowOjFlYTE2YTBjYTU4MGNmYmJlZWJmZWExODQ3ODRjOTAxNDpkZWZhdWx0XzI6MQ%3D%3D; userid=459541413; u_name=%BA%DA%C9%BDJzl; escapename=%25u9ed1%25u5c71Jzl; ticket=b909a4542156f3781a86b8aaefce3007; v=ApheKMKxdxX9FluRdtjNUdGcac08gfwLXuXQj9KJ5FOGbTKxepHMm671oBoh', 'hexin-v': 'AiDRI3i0b1qEZNNemO_FOZlE8SXqKQQBpg9Y4Jox7pbOH8oZQjnUg_YdKIHp', 'Host': 'q.10jqka.com.cn', 'Referer': 'http://q.10jqka.com.cn/', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest'},

                    ]

        try:
            time.sleep(random.random()*5)   #设置延时
            headers = random.choice(headers_list)
            r = requests.get(url, headers = headers, proxies=proxies, timeout=4)
        except:
            if num_retries > 0:
                print("重新下载")
                self.proxy_con = 0  #更换代理
                self.downloader(url,num_retries-1)
            else:
                if not self.PAGE_TRACK in self.PAGE_LIST:    #首先应该判断 该页是否存在列表中，如果不存在， 则将其加入其中
                        self.PAGE_LIST.append(self.PAGE_TRACK)   #将获取失败的url保存起来，后面再次循环利用，将元素添加在末尾，
        else:            
            return r.text


    def items_return(self):
        sys.setrecursionlimit(5000)
        count = 0
        while True:
            if self.FLAG < self.MAX_PAGE:
                url_list = self.url_yield()   #获取url
            else:
                url_list = self.url_omi()
                if len(PAGE_LIST) ==0:
                    break
            print("执行到了获取模块")

            for url in url_list:
                html = self.downloader(url)
                #打印提示信息
                print('URL is:', url)
                items = {}   #建立一个空字典，用于信息存储
                try:                      
                    soup = BeautifulSoup(html, 'lxml')
                    for tr in soup.find('tbody').find_all('tr'):
                        td_list = tr.find_all('td')
                        items['代码'] = td_list[1].string
                        items['名称'] = td_list[2].string
                        items['现价'] = td_list[3].string
                        items['涨跌幅'] = td_list[4].string
                        self.writer.writerow(items)
                        print(items)
                        print("保存成功")
                        #如果保存成功，则继续使用代理
                        self.proxy_con = 1
                        #print("解析成功")
                        #yield items          #将结果返回
                except:
                    print("解析失败")
                    #解析失败，则将代理换掉
                    self.proxy_con = 0   
                    #print(html)
                    if not self.PAGE_TRACK in self.PAGE_LIST:
                        self.PAGE_LIST.append(self.PAGE_TRACK)
                    else:
                        count += 1

            if count == 2:
                break



app = crawl()
app.items_return()   #打印最后的结果

执行到了获取模块
FLAG 是： 1
代理获取失败，重新获取
代理获取失败，重新获取
URL is: http://q.10jqka.com.cn//index/index/board/all/field/zdf/order/desc/page/1/ajax/1/
解析失败
FLAG 是： 2
代理获取失败，重新获取
代理获取失败，重新获取
URL is: http://q.10jqka.com.cn//index/index/board/all/field/zdf/order/desc/page/2/ajax/1/
解析失败
FLAG 是： 3
代理获取失败，重新获取
代理获取失败，重新获取
URL is: http://q.10jqka.com.cn//index/index/board/all/field/zdf/order/desc/page/3/ajax/1/
解析失败
FLAG 是： 4
代理获取失败，重新获取
代理获取失败，重新获取
URL is: http://q.10jqka.com.cn//index/index/board/all/field/zdf/order/desc/page/4/ajax/1/
解析失败
FLAG 是： 5
代理获取失败，重新获取
代理获取失败，重新获取
URL is: http://q.10jqka.com.cn//index/index/board/all/field/zdf/order/desc/page/5/ajax/1/
解析失败
FLAG 是： 6
代理获取失败，重新获取
代理获取失败，重新获取
URL is: http://q.10jqka.com.cn//index/index/board/all/field/zdf/order/desc/page/6/ajax/1/
解析失败
FLAG 是： 7
代理获取失败，重新获取
代理获取失败，重新获取
URL is: http://q.10jqka.com.cn//index/index/board/all/field/zdf/order/desc/page/7/ajax/1/
解析失败
FLAG 是： 8
代理获取失败，重新获取
代理获取失败，重新获取
URL is: http://q.10jqka.com.cn//index/index/board/all/