In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from scikeras.wrappers import KerasClassifier, KerasRegressor
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score, mean_absolute_percentage_error, mean_absolute_error

In [3]:
# Load datasets
data_nup = pd.read_excel('../datasets/X_nup.xlsx')
data_bp = pd.read_excel('../datasets/X_bp.xlsx')

In [5]:
# Merge datasets on index with INNER join
data_combined = data_nup.merge(data_bp, left_index=True, right_index=True, how='inner')

# Drop unnecessary columns
data_cleaned = data_combined.drop(columns=["Unnamed: 0_x", "Unnamed: 0_y"])

In [6]:
# Separate features and target
X = data_cleaned.drop(columns=["Соотношение матрица-наполнитель"])
y = data_cleaned["Соотношение матрица-наполнитель"]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [16]:
def print_metrics(y_test_values, y_pred_values):
    MSE = metrics.mean_squared_error(y_test_values, y_pred_values)
    MAE = metrics.mean_absolute_error(y_test_values, y_pred_values)
    RMSE = np.sqrt(MSE)  
    CoefDet = r2_score(y_test_values, y_pred_values)
    accuracy = 100 - (MAE/ np.mean(y_test_values)) * 100
    print("Среднеквадратическая ошибка MSE:", np.round(MSE, 3))
    print("Средняя абсолютная ошибка MAE:", np.round(MAE, 3))
    print("Корень из среднеквадратической ошибки RMSE:", np.round(RMSE, 3))
    print("Коэффициент детерминации R2:", np.round(CoefDet, 3))
    print('Точность модели (%)', np.round(accuracy, 3))

In [21]:
def create_model(neurons=32, activation='relu', optimizer='adam', dropout_rate=0.2):
    model = Sequential([
        Dense(neurons, activation=activation, input_shape=(X_train_scaled.shape[1],)),
        Dropout(dropout_rate),
        Dense(neurons // 2, activation=activation),
        Dropout(dropout_rate),
        Dense(1, activation='linear')  # Linear activation for regression
    ])
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
    return model

In [23]:
model = KerasRegressor(build_fn=create_model, verbose=0, activation='relu', optimizer='adam',
                       neurons=32, dropout_rate=0.2)
param_grid = {
    'neurons': [32, 64, 128],
    'activation': ['relu', 'tanh'],
    'optimizer': ['adam', 'rmsprop'],
    'dropout_rate': [0.2, 0.3],
    'batch_size': [16, 32],
    'epochs': [50, 100]
}
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='neg_mean_absolute_error', verbose=1)
grid_result = grid.fit(X_train_scaled, y_train)
print(f"Best parameters: {grid_result.best_params_}")
print(f"Best score: {-grid_result.best_score_}")

Fitting 3 folds for each of 96 candidates, totalling 288 fits


  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  X, y = self._initialize(X, y)
  super().__init__(activity_regu

Best parameters: {'activation': 'tanh', 'batch_size': 32, 'dropout_rate': 0.3, 'epochs': 100, 'neurons': 32, 'optimizer': 'rmsprop'}
Best score: 0.7288157657154516


In [25]:
best_model = grid_result.best_estimator_
test_mae = best_model.score(X_test_scaled, y_test)
print(f"Test MAE: {-test_mae}")

Test MAE: 0.025045767315341028


In [28]:
model = KerasRegressor(build_fn=create_model, verbose=0, activation='tanh', optimizer='rmsprop',
                       neurons=32, dropout_rate=0.3)
history = model.fit(X_train_scaled, y_train, 
                    epochs=100,
                    verbose=0, validation_data=(X_test_scaled, y_test)) 
y_pred_ns = model.predict(X_test_scaled)
print_metrics(y_test, y_pred_ns)


  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)






Среднеквадратическая ошибка MSE: 0.922
Средняя абсолютная ошибка MAE: 0.791
Корень из среднеквадратической ошибки RMSE: 0.96
Коэффициент детерминации R2: -0.03
Точность модели (%) 73.584


In [18]:
# Define the neural network model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='linear')  # Linear activation for regression
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
history = model.fit(X_train_scaled, y_train, validation_data=(X_test_scaled, y_test),
                    epochs=50, batch_size=16, verbose=1)

# Evaluate the model
eval_results = model.evaluate(X_test_scaled, y_test, verbose=1)
y_pred_ns = model.predict(X_test_scaled)
print(f"Test Loss: {eval_results[0]}, Test MAE: {eval_results[1]}")
print_metrics(y_test, y_pred_ns)
# Save the model
model.save('matrix_filler_model.h5')

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 6.8860 - mae: 2.3897 - val_loss: 1.3410 - val_mae: 0.9392
Epoch 2/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.4933 - mae: 0.9770 - val_loss: 1.1396 - val_mae: 0.8910
Epoch 3/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.4845 - mae: 0.9767 - val_loss: 1.1812 - val_mae: 0.9049
Epoch 4/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.5100 - mae: 0.9802 - val_loss: 1.1602 - val_mae: 0.9059
Epoch 5/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.3401 - mae: 0.9226 - val_loss: 1.1295 - val_mae: 0.8887
Epoch 6/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.3594 - mae: 0.9327 - val_loss: 1.1445 - val_mae: 0.8922
Epoch 7/50
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 1.2686 - mae: 0.905



Test Loss: 0.9917469024658203, Test MAE: 0.8318669199943542
Среднеквадратическая ошибка MSE: 0.992
Средняя абсолютная ошибка MAE: 0.832
Корень из среднеквадратической ошибки RMSE: 0.996
Коэффициент детерминации R2: -0.108
Точность модели (%) 72.211
