In [8]:
import numpy as np
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
path = './model_data'

In [2]:
columns = ['일자','월', '어종', '개체수', '표층수온', '표층염분', '조업_위도', '조업_경도', '생태구']
대한해협 = pd.read_csv(f'{path}/대한해협_어업.csv')[columns].rename({'조업_위도':'위도', '조업_경도':'경도'}, axis=1)
동해 = pd.read_csv(f'{path}/동해_어업.csv')[columns].rename({'조업_위도':'위도', '조업_경도':'경도'}, axis=1)
서남해역 = pd.read_csv(f'{path}/서남해역_어업.csv')[columns].rename({'조업_위도':'위도', '조업_경도':'경도'}, axis=1)
서해중부 = pd.read_csv(f'{path}/서해중부_어업.csv')[columns].rename({'조업_위도':'위도', '조업_경도':'경도'}, axis=1)
제주 = pd.read_csv(f'{path}/제주_어업.csv')[columns].rename({'조업_위도':'위도', '조업_경도':'경도'}, axis=1)

In [3]:
data = pd.concat([대한해협, 동해, 서남해역, 서해중부, 제주], axis=0).sort_values('일자').reset_index(drop=True)

In [4]:
data.head()

Unnamed: 0,일자,월,어종,개체수,표층수온,표층염분,위도,경도,생태구
0,2019-11-10-09:21:00,11,문어,2,18.0586,33.2332,34.281879,126.68805,동해
1,2019-11-10-09:21:00,11,장어,2,18.0586,33.2332,34.281879,126.68805,동해
2,2019-11-10-09:21:00,11,돌게,2,18.0586,33.2332,34.281879,126.68805,동해
3,2019-11-10-09:21:00,11,문어,2,18.0586,33.2332,34.281879,126.68805,동해
4,2019-11-10-09:21:00,11,장어,2,18.0586,33.2332,34.281879,126.68805,동해


In [18]:
models = dict()
params = {'n_estimators':range(40, 101), 'max_depth':range(3, 16)}

for fish in data['어종'].unique():
    temp = data[data['어종']==fish]
    X = temp[['위도', '경도', '표층수온','표층염분', '월']]
    y = temp['개체수']
    cv = GridSearchCV(
        estimator=RandomForestRegressor(),
        param_grid=params,
        scoring='r2',
        n_jobs=-1,
        cv=3,
    ).fit(X, y)
    models[fish]=cv.best_estimator_

In [19]:
joblib.dump(models, f'{path}/models.pkl')

['./model_data/models.pkl']

In [20]:
tmp = joblib.load(f'{path}/models.pkl')

In [38]:
X

Unnamed: 0,위도,경도,표층수온,표층염분,월
183381,34.588466,127.652774,6.1113,32.6829,3
183400,34.588466,127.652774,6.5069,33.3011,3
183447,34.588466,127.652774,3.8469,31.5661,3
183507,34.588466,127.652774,3.8469,31.5661,3
183641,34.588466,127.652774,3.3848,31.5926,3
...,...,...,...,...,...
237614,34.588466,127.652774,16.0271,31.1441,5
237790,34.588466,127.652774,16.1939,31.0808,5
238078,34.588466,127.652774,17.9460,32.6235,5
238238,34.588466,127.652774,17.1962,28.7772,5


In [40]:
X.iloc[0].values

array([ 34.58846616, 127.65277427,   6.1113    ,  32.6829    ,
         3.        ])

In [45]:
tmp['갈치'].predict([X.iloc[0].values])



array([99.70631155])