In [57]:
import numpy as np
import pandas as pd
import scipy.optimize as op
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils.validation import check_X_y, check_is_fitted, check_array
import warnings
warnings.filterwarnings('ignore') # 実行に影響のない　warninig　を非表示にします. 非推奨.

In [58]:

class MyLinearRegression(BaseEstimator, RegressorMixin):
    def __init__(self, lam=0):
        """
        Initialize a coefficient and an intercept.
        """
        self.lam = lam

    def fit(self, X, y):
        """
        X: array-like, shape (n_samples, n_features)
        y: array, shape (n_samples,)
        Estimate a coefficient and an intercept　from data.
        """
        X, y = check_X_y(X, y, y_numeric=True)

        X_ = np.append(X, [[1] for i in range(len(X))], axis=1)
        coef = np.array([1 for i in range(len(X_[0]))])

        def f(params):
            return ((y-np.matmul(X_, params)) ** 2).sum()+self.lam*(params**2).sum()

        res = op.minimize(f, coef)

        self.coef_ = res.x[:-1]
        self.intercept_ = res.x[-1]
        return self

    def predict(self, X):
        """
        Calc y from X
        """
        check_is_fitted(self, "coef_", "intercept_")  # 学習済みかチェックする(推奨)
        X = check_array(X)
        X_ = np.append(X, [[1] for i in range(len(X))], axis=1)
        y = np.matmul(X_, np.append(self.coef_,self.intercept_))
        return y


In [59]:
X_train=pd.read_csv("../text/data/X_train.csv")
y_train=pd.read_csv("../text/data/y_train.csv")

X_test=pd.read_csv("../text/data/X_test.csv")
X=X_train.drop("最多風向",axis=1).values
y=np.ravel(y_train.values)
# print(X)
# print(y)
# y_train.head()
# print(y)

In [60]:
X=X-X.mean()
y=y-y.mean()

In [61]:
clf=MyLinearRegression()
clf.fit(X,y)
print(clf.coef_)
print(clf.intercept_)

[-0.36710079  1.46229318 -0.891735    0.03603802  2.34559292 -0.83520083
 -0.3473535  -0.1104787  -0.09079284 -0.04601216  0.71083359 -0.92886547
  0.05869408]
417.6345934245475


In [67]:
# Grid search of Ridge()
parameters = {'alpha':np.arange(-5000,5000)}
print(parameters)
reg = GridSearchCV(Ridge(),parameters,cv=5)
reg.fit(X,y)
best = reg.best_estimator_
# 決定係数
print("決定係数: ", best.score(X, y)) # BaseEstimatorを継承しているため使える
# lambda
print("lam: ", best.alpha)

{'alpha': array([-5000, -4999, -4998, ...,  4997,  4998,  4999])}


In [65]:
# Grid search of MyLinearRegression
parameters = {'lam':np.arange(-5000,5000)}
print(parameters)
reg = GridSearchCV(MyLinearRegression(),parameters,cv=5)
reg.fit(X,y)
best = reg.best_estimator_
# 決定係数
print("決定係数: ", best.score(X, y)) # BaseEstimatorを継承しているため使える
# lambda
print("lam: ", best.lam)

{'lam': array([9.35762297e-14, 2.54366565e-13, 6.91440011e-13, 1.87952882e-12,
       5.10908903e-12, 1.38879439e-11, 3.77513454e-11, 1.02618796e-10,
       2.78946809e-10, 7.58256043e-10, 2.06115362e-09, 5.60279644e-09,
       1.52299797e-08, 4.13993772e-08, 1.12535175e-07, 3.05902321e-07,
       8.31528719e-07, 2.26032941e-06, 6.14421235e-06, 1.67017008e-05,
       4.53999298e-05, 1.23409804e-04, 3.35462628e-04, 9.11881966e-04,
       2.47875218e-03, 6.73794700e-03, 1.83156389e-02, 4.97870684e-02,
       1.35335283e-01, 3.67879441e-01, 1.00000000e+00, 2.71828183e+00,
       7.38905610e+00, 2.00855369e+01, 5.45981500e+01, 1.48413159e+02,
       4.03428793e+02, 1.09663316e+03, 2.98095799e+03, 8.10308393e+03,
       2.20264658e+04, 5.98741417e+04, 1.62754791e+05, 4.42413392e+05,
       1.20260428e+06, 3.26901737e+06, 8.88611052e+06, 2.41549528e+07,
       6.56599691e+07, 1.78482301e+08, 4.85165195e+08, 1.31881573e+09,
       3.58491285e+09, 9.74480345e+09, 2.64891221e+10, 7.20048993e+10

In [66]:
from sklearn.decomposition import PCA
pca=PCA(n_components=2)
X_pca=pca.fit_transform(X)
print(X_pca.shape)
print(pca.explained_variance_ratio_)
print(pca.components_)

(28, 2)
[0.58387849 0.27085845]
[[ 0.01676688  0.03082961  0.0143759   0.93738159  0.01972845  0.06350653
   0.14646301  0.0549713   0.10304277  0.02650805 -0.11122719 -0.11168056
  -0.23413121]
 [ 0.07622884  0.11850467  0.04265574  0.03057731  0.01751759  0.05344412
   0.10303293 -0.05285351 -0.55808295 -0.79890643 -0.0221221  -0.02302409
  -0.10045567]]
