In [None]:
import numpy as np
import pandas as pd
from matplotlib.pyplot import subplots

In [None]:
import statsmodels.api as sm  #统计库

In [None]:
from statsmodels.stats.outliers_influence \
    import variance_inflation_factor as VIF
from statsmodels.stats.anova import anova_lm

In [None]:
from ISLP import load_data
from ISLP.models import (ModelSpec as MS,
                         summarize,
                         poly)

'''ISLP实验的包'''

In [None]:
dir()

In [None]:
A = np.array([3, 11, 5])
dir(A)

In [None]:
A.sum()

In [None]:
Boston = load_data("Boston")
Boston.columns

In [None]:
X = pd.DataFrame({'intercept': np.ones(Boston.shape[0]),
                  'lstat': Boston['lstat']})
X[:4]

In [None]:
y = Boston['medv']
model = sm.OLS(y, X)
results = model.fit()

In [None]:
summarize(results)

In [None]:
design = MS(['lstat'])
design = design.fit(Boston)
X = design.transform(Boston)
X[:4]

In [None]:
design = MS(['lstat'])
X = design.fit_transform(Boston)
X[:4]

In [None]:
results.summary()

In [None]:
results.params  #查找拟合数据

In [None]:
new_df = pd.DataFrame({'lstat': [5, 10, 15]})
newX = design.transform(new_df)
newX

In [None]:
new_predictions = results.get_prediction(newX)
new_predictions.predicted_mean

In [None]:
new_predictions.conf_int(obs=True, alpha=0.5)

In [None]:
def abline(ax, b, m, *args, **kwargs):
    xlim = ax.get_xlim()
    ylim = [m * xlim[0] + b, m * xlim[1] + b]
    ax.plot(xlim, ylim, *args, **kwargs)

In [None]:
'''**kwargs 允许函数接受任意数量的关键字参数，这些参数会以字典的形式传入函数内部。'''
'''*args 允许函数接受任意数量的位置参数，这些参数会以元组的形式传入函数内部。'''
'''*args 和 **kwargs 都是python中的可变参数。'''

In [None]:
ax = Boston.plot.scatter('lstat', 'medv')
abline(ax,
       results.params.iloc[0],  #pandas新版用iloc索引
       results.params.iloc[1],
       'r--',  #红色 --线
       linewidth=3)

In [None]:
ax = subplots(figsize=(8, 8))[1]
ax.scatter(results.fittedvalues, results.resid)
ax.set_xlabel('Fitted value')
ax.set_ylabel('Residual')
ax.axhline(0, c='k', ls='--')  #添加一条黑色虚线

In [None]:
infl=results.get_influence()
ax=subplots(figsize=(8,8))[1]
ax.scatter(np.arange(X.shape[0]),infl.hat_matrix_diag)  #杠杆统计 算帽子矩阵对角 得出杠杆点
ax.set_xlabel('Index')
ax.set_ylabel('Leverage')
np.argmax(infl.hat_matrix_diag)

In [None]:
X=MS(['lstat','age']).fit_transform(Boston)
model1=sm.OLS(y,X)
results1=model1.fit()
summarize(results1)

In [None]:
terms=Boston.columns.drop('medv')
terms

In [None]:
X=MS(terms).fit_transform(Boston)
model=sm.OLS(y,X)
results=model.fit()
summarize(results)

In [None]:
minus_age=Boston.columns.drop('medv','age')
Xma=MS(minus_age).fit_transform(Boston)
summarize(sm.OLS(y,Xma).fit())

In [124]:
vals=[VIF(X,i)
      for i in range(1,X.shape[1])]
vif=pd.DataFrame({'vif':vals},
                 index=X.columns[1:])
vif

Unnamed: 0,vif
crim,1.767486
zn,2.298459
indus,3.987181
chas,1.071168
nox,4.369093
rm,1.912532
age,3.088232
dis,3.954037
rad,7.445301
tax,9.002158
