In [None]:
# OLS 계산하기 - 사이킷런
import numpy as np
from sklearn import LinearRegression

def ols_sklearn(row):
    """Solve OLS using scikit-learn's LinearRegression"""
    est = LinearRegression()
    X = np.arange(row.shape[0]).reshape(-1, 1) # shape (14, 1)
    # note that the intercept is built inside LinearRegression
    est.fit(X, row.values)
    m = est.coef_[0] # note c is in est.intercept_

    return m

In [None]:
# OLS 계산하기 - 넘파이
import numpy as np

def ols_lstsq(row):
    """Solve OLS using numpy.linalg.lstsq"""
    # build X values for [0, 13]
    X = np.arange(row.shape[0])
    ones = np.ones(row.shape[0])
    A = np.vstack((X, ones)).T
    # lstsq returns the coefficient and intercept as the first result
    # followed by the residuals and other items
    m, c = np.linalg.lstsq(A, row.values, rcond = -1)[0]

    return m

In [None]:
def ols_lstsq_raw(row):
    """Variant of 'ols_lstsq' where row is a numpy array (not a Series)"""
    X = np.arange(row.shape[0])
    ones = np.ones(row.shape[0])
    A = np.vstack((X, ones)).T
    m, c = np.linalg.lstsq(A, row, rcond = -1)[0]

    return m

In [None]:
# 나쁜 구현: iloc를 사용해 한번에 한 행씩 가져와서 계산
ms = []
for row_idx in range(df.shape[0]):
    row = df.iloc[row_idx]
    m = ols_lstsq(row)
    ms.append(m)

results = pd.Series(ms)

In [None]:
# 조금 더 나은 코드
ms = []
for row_idx, row in df.iterrows():
    m = ols_lstsq(row)
    ms.append(m)

results = pd.Series(ms)

In [None]:
# 더 나은 코드
ms = df.apply(ols_lstsq, axis = 1, raw = True)
results = pd.Series(ms)

In [None]:
# 나쁜 코드
results = None

for row_idx in range(df.shape[0]):
    row = df.iloc[row_idx]
    m = ols_lstsq(row)
    if results is None:
        results = pd.Series([m])
    else:
        results = pd.concat((results, pd.Series([m])))