In [30]:
import jqdatasdk as jq
from jqdatasdk import finance
jq.auth('13811815618','Shuzi307104!')

In [42]:
#首先导入线性回归模型
from sklearn.linear_model import LinearRegression
#导入数据集拆分工具
from sklearn.model_selection import train_test_split
#为了演示，我们使用scikit-learn内置的数据集生成工具
from sklearn.datasets import make_regression
#导入numpy和pandas
import numpy as np
import pandas as pd
#导入画图工具
import matplotlib.pyplot as plt
import seaborn as sns

In [31]:
stocks = jq.get_index_stocks('000016.XSHG')

In [35]:
q = jq.query(jq.valuation.code,
          #市值
          jq.valuation.market_cap,
          #净资产，用总资产减去总负债
         jq.balance.total_assets - jq.balance.total_liability,
          #再来一个资产负债率的倒数
         jq.balance.total_assets/jq.balance.total_liability,
          #把净利润也考虑进来
         jq.income.net_profit,
          #还有年度收入增长
         jq.indicator.inc_revenue_year_on_year,
          #研发费用
         jq.balance.development_expenditure
         ).filter(jq.valuation.code.in_(stocks))

In [37]:
#将这些数据存入一个数据表中
df = jq.get_fundamentals(q)

In [38]:
df.columns = ['code', 
              'mcap', 
              'na', 
              '1/DA ratio', 
              'net income', 
              'growth', 
              'RD']

In [39]:
df.head()

Unnamed: 0,code,mcap,na,1/DA ratio,net income,growth,RD
0,600010.XSHG,911.7007,61981620000.0,1.721479,-1407279000.0,-30.21,
1,600028.XSHG,5554.6709,925536900000.0,1.822713,13394000000.0,13.41,
2,600030.XSHG,3051.5505,253812100000.0,1.234698,5587781000.0,-25.66,
3,600031.XSHG,1362.3231,65966220000.0,1.746315,990159000.0,-8.27,531280000.0
4,600036.XSHG,8907.6494,927766400000.0,1.105676,37977000000.0,3.73,


In [40]:
#把股票代码做成数据表的index
df.index = df['code'].values
#然后把原来代码这一列丢弃掉，防止它参与计算
df = df.drop('code', axis = 1)
#把除去市值之外的数据作为特征，赋值给X
X = df.drop('mcap', axis = 1)
#市值这一列作为目标值，赋值给y
y = df['mcap']
#用0来填补数据中的空值
X = X.fillna(0)
y = y.fillna(0)

In [43]:
#使用线性回归来拟合数据
reg = LinearRegression().fit(X,y)

In [44]:
predict = pd.DataFrame(reg.predict(X), 
                       #保持和y相同的index，也就是股票的代码
                       index = y.index,
                       #设置一个列名，这个根据你个人爱好就好
                       columns = ['predict_mcap'])

In [45]:
predict.head()

Unnamed: 0,predict_mcap
600010.XSHG,1583.077326
600028.XSHG,4849.453777
600030.XSHG,2434.954287
600031.XSHG,1927.684972
600036.XSHG,6675.836354


In [46]:
#使用真实的市值，减去模型预测的市值
diff = df['mcap'] - predict['predict_mcap']
#将两者的差存入一个数据表，index还是用股票的代码
diff = pd.DataFrame(diff, index = y.index, columns = ['diff'])
#将该数据表中的值，按生序进行排列
diff = diff.sort_values(by = 'diff', ascending = True)
#找到市值被低估最多的10只股票
diff.head(10)

Unnamed: 0,diff
603986.XSHG,-4091.684097
601919.XSHG,-3658.837312
600276.XSHG,-2952.746799
601288.XSHG,-2448.095991
601668.XSHG,-2022.097935
600438.XSHG,-1848.361159
600585.XSHG,-1751.292565
601166.XSHG,-1682.398918
600436.XSHG,-1642.368608
603799.XSHG,-1615.539028
