#### Настройки с учетом версий библиотеки scikit-learn

In [1]:
# проверка версии для библиотеки scikit-learn 0.18
from distutils.version import LooseVersion as Version
from sklearn import __version__ as sklearn_version
if Version(sklearn_version) < '0.18':
    from sklearn.cross_validation import train_test_split
else:
    from sklearn.model_selection import train_test_split

#### Импорт набора данных

In [2]:
import numpy as np
import pandas as pd

url = 'yacht_hydrodynamics.data'
df = pd.read_csv(url, header=None, sep="\s+")
df.columns = ['LPCB', 'PC', 'LDR', 'BDR', 'LBR', 'FN', 'RR']

df

Unnamed: 0,LPCB,PC,LDR,BDR,LBR,FN,RR
0,-2.3,0.568,4.78,3.99,3.17,0.125,0.11
1,-2.3,0.568,4.78,3.99,3.17,0.150,0.27
2,-2.3,0.568,4.78,3.99,3.17,0.175,0.47
3,-2.3,0.568,4.78,3.99,3.17,0.200,0.78
4,-2.3,0.568,4.78,3.99,3.17,0.225,1.18
...,...,...,...,...,...,...,...
303,-2.3,0.600,4.34,4.23,2.73,0.350,8.47
304,-2.3,0.600,4.34,4.23,2.73,0.375,12.27
305,-2.3,0.600,4.34,4.23,2.73,0.400,19.59
306,-2.3,0.600,4.34,4.23,2.73,0.425,30.48


#### Устранение строк с пропущенными значениями

In [3]:
df.isnull().sum()

LPCB    0
PC      0
LDR     0
BDR     0
LBR     0
FN      0
RR      0
dtype: int64

#### Нормализация

In [4]:
X = df.iloc[:, :-1].values
y = df['RR'].values

from sklearn.preprocessing import MinMaxScaler

MMS = MinMaxScaler()
X = MMS.fit_transform(X)
y = y.reshape(-1, 1)
y = MMS.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

#### Модель 1

In [5]:
from keras import models
from keras import layers

def build_model():
    model = models.Sequential()
    model.add(layers.Dense(128, activation='relu',
                           input_shape=(X_train.shape[1],)))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(1))
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    return model

Using TensorFlow backend.


In [6]:
#disable gpu
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [7]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

model = build_model()
model.fit(X_train, y_train, epochs=80, verbose=0)

y_test_pred= model.predict(X_test)

print('MSE: %.5f' % mean_squared_error(y_test, y_test_pred))
print('R^2: %.5f' % r2_score(y_test, y_test_pred))

MSE: 0.00251
R^2: 0.94308


#### Модель 2

In [8]:
def build_model():
    model = models.Sequential()
    model.add(layers.Dense(256, activation='relu',
                           input_shape=(X_train.shape[1],)))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(1))
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
    return model

In [9]:
model = build_model()
model.fit(X_train, y_train, epochs=80, verbose=0)

y_test_pred= model.predict(X_test)

print('MSE: %.5f' % mean_squared_error(y_test, y_test_pred))
print('R^2: %.5f' % r2_score(y_test, y_test_pred))

MSE: 0.00282
R^2: 0.93609
