In [4]:
# Python > 3.8
import pandas as pd
import numpy  as np

import sklearn
from sklearn import linear_model
from sklearn import ensemble
from sklearn import metrics
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

%matplotlib inline
from matplotlib import pyplot as plt
from matplotlib import cm
import seaborn as sns
import tensorflow as tf

import json
import pickle

In [5]:
def GET_METRICS_SINGLE(y_test, y_pred) -> None:
    rmse = np.sqrt(metrics.mean_squared_error(y_test, y_pred))
    r2   = metrics.r2_score(y_test, y_pred)
    print ("RMSE: {:>9,.6f} (кв. корень из среднеквадратичной ошибки)".format( rmse ))
    print ("R2  : {:>9,.3f} (коэфф. детерминации)".format                    ( r2 ))
#--------------------------------------------------------------------------

In [6]:
df = pd.read_csv('./concrete_nan_PREPARED2.csv', # путь к файлу, (используйте автодополнение)
                   sep=',', # разделитель данных в файле
                   header=0, # номер строки с заголовками, нумерация с нуля
                   # header='None', # если заголовки отсутствуют
                  )
print("Размер таблицы", df.shape)
df[:2]

Размер таблицы (896, 9)


Unnamed: 0,Cement,Blast_Furnace_Slag,Fly_Ash,Water,Superplasticizer,Coarse_Aggregate,Fine_Aggregate,Age,Concrete_compressive_strength
0,266.0,114.0,0.0,228.0,0.0,932.0,670.0,90.0,47.03
1,380.0,95.0,0.0,228.0,0.0,932.0,594.0,28.0,36.45


In [7]:
features = ['Cement', 'Blast_Furnace_Slag', 'Fly_Ash',  'Water', 'Superplasticizer', 'Coarse_Aggregate', 'Fine_Aggregate', 'Age']
target   = ['Concrete_compressive_strength']

In [8]:
dfX = df[features]
dfY = df[target]

In [9]:
valid_size = 0.3
rand_seed = 8
xTrain, xTest, yTrain, yTest = train_test_split(
    dfX[features],
    dfY[target],
    test_size = valid_size,
    random_state = rand_seed,
    shuffle = True
)

In [10]:
linreg_x1x2x3 = linear_model.LinearRegression()

lin_model_x1x2x3 = linreg_x1x2x3.fit(
    xTrain[['Cement', 'Superplasticizer', 'Age']], 
    yTrain[['Concrete_compressive_strength']]
)

In [11]:
y_pred_x1x2x3 = lin_model_x1x2x3.predict(xTest[['Cement', 'Superplasticizer', 'Age']])

In [12]:
GET_METRICS_SINGLE(yTest, y_pred_x1x2x3)

RMSE: 10.625701 (кв. корень из среднеквадратичной ошибки)
R2  :     0.589 (коэфф. детерминации)


In [14]:
m1_dist = dict()
m1_dist["type"] = str(type(lin_model_x1x2x3))
m1_dist["normNeed"] = False
m1_dist["standartNeed"] = False
m1_dist["input"] = ['Cement', 'Superplasticizer', 'Age']
m1_dist["output"] = ['Concrete_compressive_strength']
m1_dist["R2"] = metrics.r2_score(yTest[['Concrete_compressive_strength']], y_pred_x1x2x3)
m1_dist["RMSE"] = np.sqrt( metrics.mean_squared_error(yTest[['Concrete_compressive_strength']], y_pred_x1x2x3))
m1_dist["trainData"] = "concrete_nan_PREPARED2.csv"
m1_dist["modelName"] = "m1.dump"

In [15]:
m1_file = open("./m1.json", "w")
json.dump(m1_dist, m1_file,  indent=4)
m1_file.close()

In [16]:
m1_out = open("./m1.dump", "wb")
pickle.dump(lin_model_x1x2x3, m1_out)
m1_out.close()

In [17]:
m1_file = open("m1.json", "r")
m1_dist = json.load(m1_file)
m1_file.close()

In [18]:
m1_out = open("m1.dump", "rb")
m1 = pickle.load(m1_out)
m1_out.close()

In [19]:
dfX_test = pd.DataFrame(data = [[249.1, 12.8, 100.0]], columns = ['Cement', 'Superplasticizer', 'Age'])

In [20]:
y_pred = m1.predict(dfX_test)

In [21]:
y_pred

array([[60.78586275]])

In [22]:
scalerNormX = MinMaxScaler()
scalerNormX.fit(dfX)

dfXNorm = pd.DataFrame (
  data    = scalerNormX.transform(dfX), # значения ячеек    <<--
  columns = dfX.columns,           # названия столбцов
  index   = dfX.index              # идентификаторы строк
)
print("Размер таблицы", dfXNorm.shape)
dfXNorm[:2]

Размер таблицы (896, 8)


Unnamed: 0,Cement,Blast_Furnace_Slag,Fly_Ash,Water,Superplasticizer,Coarse_Aggregate,Fine_Aggregate,Age
0,0.384977,0.360645,0.0,1.0,0.0,0.380814,0.216524,0.747899
1,0.652582,0.300538,0.0,1.0,0.0,0.380814,0.0,0.226891


In [23]:
scalerNormY = MinMaxScaler()
scalerNormY.fit(dfY)

dfYNorm = pd.DataFrame (
  data    = scalerNormY.transform(dfY), # значения ячеек    <<--
  columns = dfY.columns,           # названия столбцов
  index   = dfY.index              # идентификаторы строк
)
print("Размер таблицы", dfYNorm.shape)
dfYNorm[:2]

Размер таблицы (896, 1)


Unnamed: 0,Concrete_compressive_strength
0,0.584543
1,0.446188


In [24]:
dfXNorm.describe().loc[["min", "max" ]]

Unnamed: 0,Cement,Blast_Furnace_Slag,Fly_Ash,Water,Superplasticizer,Coarse_Aggregate,Fine_Aggregate,Age
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [25]:
dfYNorm.describe().loc[["min", "max" ]]

Unnamed: 0,Concrete_compressive_strength
min,0.0
max,1.0


In [26]:
valid_size = 0.3 # доля тестовой части в выборке
rand_seed = 8 # начальное состояние генератора случ. чисел

In [27]:
xNorm_train, xNorm_test, yNorm_train, yNorm_test =  train_test_split(
    dfXNorm[features], # исходные данные X
    dfYNorm[target],   # исходные данные y

    test_size    = valid_size, # доля тестовой части в выборке
    random_state =rand_seed,   # начальное состояние генератора случ. чисел
    shuffle=True               # перемешивание
)

In [28]:
totalHistoryLossTrain=[]
totalHistoryLossTest=[]

model = tf.keras.models.Sequential()

model.add(   tf.keras.layers.Input(
    shape=(8 ,  ) # пример: shape=(1,)
  ))

model.add(   tf.keras.layers.Dense(
    units=30,
    activation=None,
))

model.add(    tf.keras.layers.Activation(
    activation = tf.keras.activations.sigmoid,
))

model.add(   tf.keras.layers.Dense(
    units=30,
    activation=None,
))

model.add(    tf.keras.layers.Activation(
    activation = tf.keras.activations.sigmoid,
))

model.add(    tf.keras.layers.Dense(units=1,  activation=None))

fLoss=tf.keras.losses.MeanSquaredError()
fOptimizer=tf.keras.optimizers.Adam(learning_rate=0.01)
fMetric=tf.keras.losses.MeanSquaredError()

model.compile(
    loss=fLoss,
    optimizer=fOptimizer,
    metrics=[fMetric]
)

In [29]:
epochForTrain = 500

history = model.fit(
    xNorm_train,
    yNorm_train,

    validation_data=(
        xNorm_test,
        yNorm_test),

    epochs=epochForTrain,
    batch_size=1000,
    verbose=1,
)

totalHistoryLossTrain.extend(history.history['loss'])
if 'val_loss' in history.history.keys():
    totalHistoryLossTest.extend(history.history['val_loss'])

Epoch 1/500
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - loss: 0.2053 - mean_squared_error: 0.2053 - val_loss: 0.0528 - val_mean_squared_error: 0.0528
Epoch 2/500
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step - loss: 0.0495 - mean_squared_error: 0.0495 - val_loss: 0.1500 - val_mean_squared_error: 0.1500
Epoch 3/500
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step - loss: 0.1444 - mean_squared_error: 0.1444 - val_loss: 0.1317 - val_mean_squared_error: 0.1317
Epoch 4/500
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step - loss: 0.1263 - mean_squared_error: 0.1263 - val_loss: 0.0672 - val_mean_squared_error: 0.0672
Epoch 5/500
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step - loss: 0.0633 - mean_squared_error: 0.0633 - val_loss: 0.0478 - val_mean_squared_error: 0.0478
Epoch 6/500
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step - loss: 0.045

In [30]:
yNorm_pred = model.predict( xNorm_test )
GET_METRICS_SINGLE(yNorm_test, yNorm_pred)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
RMSE:  0.100067 (кв. корень из среднеквадратичной ошибки)
R2  :     0.787 (коэфф. детерминации)


In [31]:
m2_dist = dict()
m2_dist["type"] = str(type(model))
m2_dist["normNeed"] = True
m2_dist["standartNeed"] = False
m2_dist["scalerNormForX"] = "scalerNormForX.dump"
m2_dist["scalerNormForY"] = "scalerNormForY.dump"
m2_dist["input"] = ['Cement', 'Blast_Furnace_Slag', 'Fly_Ash',  'Water', 'Superplasticizer', 'Coarse_Aggregate', 'Fine_Aggregate', 'Age']
m2_dist["output"] = ['Concrete_compressive_strength']
m2_dist["R2"] = metrics.r2_score(yNorm_test, yNorm_pred)
m2_dist["RMSE"] = np.sqrt( metrics.mean_squared_error(yNorm_test, yNorm_pred))
m2_dist["trainData"] = "concrete_nan_PREPARED2.csv"
m2_dist["modelName"] = "m2.dump"

In [32]:
m2_file = open("./m2.json", "w")
json.dump(m2_dist, m2_file,  indent=4)
m2_file.close()

In [33]:
snx_file = open("./scalerNormForX.dump", "wb")
pickle.dump(scalerNormX, snx_file)
snx_file.close()
sny_file = open("./scalerNormForY.dump", "wb")
pickle.dump(scalerNormY, sny_file)
sny_file.close()

In [34]:
model.save("m2.keras")

In [35]:
m2_file = open("m2.json", "r")
m2_dist = json.load(m2_file)
m2_file.close()

In [36]:
m2=tf.keras.models.load_model("m2.keras")

In [37]:
snx_file = open("scalerNormForX.dump", "rb")
snx = pickle.load(snx_file)
snx_file.close()
sny_file = open("scalerNormForY.dump", "rb")
sny = pickle.load(sny_file)
sny_file.close()

In [38]:
dfX_test = pd.DataFrame(data = [[249.1,0.0,98.8,158.1,12.8,987.8,889.0,100.0]], columns = ['Cement', 'Blast_Furnace_Slag', 'Fly_Ash',  'Water', 'Superplasticizer', 'Coarse_Aggregate', 'Fine_Aggregate', 'Age'])

In [39]:
y_pred=m2.predict(snx.transform(dfX_test))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step


In [40]:
sny.inverse_transform(y_pred)

array([[56.51998]], dtype=float32)