# 회귀 모델 생성

In [10]:
import pandas as pd
import joblib
from sklearn.datasets import load_iris, load_diabetes, load_boston
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [2]:
iris = load_iris()
df = pd.DataFrame(iris.data, columns=['sl', 'sw', 'pl', 'pw'])
df['target'] = iris.target
df.head()

Unnamed: 0,sl,sw,pl,pw,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [3]:
features = ['sl', 'sw', 'pl', 'pw']
for feature in features:
    y = df[feature]
    X = df.drop(feature, axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=2021)
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    joblib.dump(lr, f'../static/model/iris_{feature}_lr.pkl')

In [2]:
diabetes = load_diabetes()
df = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641


In [8]:
features = diabetes.feature_names
for feature in features:
    y = diabetes.target
    X = df[feature].values.reshape(-1, 1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=2021)
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    joblib.dump(lr, f'../static/model/diabetes_{feature}_lr.pkl')

In [9]:
df.columns.tolist()

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']

In [11]:
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [12]:
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=0.25, random_state=2021)
lr.fit(X_train, y_train)
joblib.dump(lr, '../static/model/boston_lr.pkl')

['../static/model/boston_lr.pkl']

In [13]:
features = dict(zip(df.columns.tolist(), [0]*len(df.columns.tolist())))
features

{'CRIM': 0,
 'ZN': 0,
 'INDUS': 0,
 'CHAS': 0,
 'NOX': 0,
 'RM': 0,
 'AGE': 0,
 'DIS': 0,
 'RAD': 0,
 'TAX': 0,
 'PTRATIO': 0,
 'B': 0,
 'LSTAT': 0}

In [14]:
len(df.columns.tolist())

13