# SINDy Technique using Deep Learning

In [1]:
import numpy as np   
from sklearn.linear_model import LinearRegression
import pandas as pd    
import matplotlib.pyplot as plt 
%matplotlib inline 
import seaborn as sns
import keras
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPRegressor 
from keras.models import Sequential
from keras.layers import Dense, Activation, Input, Dropout
from sklearn.model_selection import train_test_split # Sklearn package's randomized data splitting function
from numpy import asarray
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error


In [2]:
%tensorflow_version 2.x
import tensorflow
tensorflow.__version__


UsageError: Line magic function `%tensorflow_version` not found.


In [3]:
# Initialize the random number generator
import random
random.seed(0)

# Ignore the warnings
import warnings
warnings.filterwarnings("ignore")

In [4]:
cData = pd.read_csv(r"C:\Users\Navnit Ashok Nair\OneDrive\Desktop\SINDy\TBC_data_study_1.csv")
cData=cData.drop(['Unnamed: 0','std_rho','std_R','RelativeDessityError','RelativeTBCError','dT'],axis=1)

x=cData.drop(['NSheets'],axis=1)
Y=cData['NSheets']
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state = 2)
x1, Y1 = sm.fit_resample(x, Y.ravel())
x1['NSheets']=Y1

x=x1.drop(['Temperature'],axis=1)
Y=x1['Temperature']
x1, Y1 = sm.fit_resample(x, Y.ravel())
x1['Temperature']=Y1



X = x1.drop(['RelativeTBR'], axis=1)
y = x1[['RelativeTBR']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)
 # created scaler
scaler = StandardScaler()
    # fit scaler on training dataset
scaler.fit(X_train)
    # transform training dataset
X_train = scaler.transform(X_train)
    # transform test dataset
X_test = scaler.transform(X_test)
poly = PolynomialFeatures(degree=3, interaction_only=True)
X_train = poly.fit_transform(X_train)
X_test = poly.fit_transform(X_test)


In [5]:
y_train


Unnamed: 0,RelativeTBR
2360,1.029948
1021,1.161645
636,0.551196
180,1.809379
2703,1.179539
...,...
2763,1.146955
905,1.092630
1096,1.088421
235,1.700032


### Print shape of the data

In [6]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(2552, 93)
(2552, 1)
(638, 93)
(638, 1)


In [23]:
import itertools
# Define the pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Preprocessing step: StandardScaler
    ('mlp', None)  # Model building step: Keras MLP model
])

# Define the range of values for the number of layers
layer_range = range(1,4)  # Change the range as per your preference

best_score = float('inf')
best_num_layers = None
best_neurons = None

for num_layers in layer_range:
    neuron_configs = list(itertools.product([8,16,32,64,128], repeat=num_layers))

    for neurons in neuron_configs:
        if all(neuron <= 128 for neuron in neurons):
            model = Sequential()
            model.add(Dense(neurons[0], activation='relu', input_shape=(X_train.shape[1],)))
            model.add(Dropout(0.01))  # Add dropout layer
            for neuron in neurons[1:]:
                model.add(Dense(neuron, activation='relu'))
                model.add(Dropout(0.01))  # Add dropout layer
            model.add(Dense(1,activation='linear'))
            model.compile(optimizer='adam', loss='mean_squared_error')  # Compile the model



            pipeline.set_params(mlp=model)

            # Perform cross-validation
            grid_search = GridSearchCV(pipeline, param_grid={}, cv=5, scoring='neg_mean_squared_error')
            grid_search.fit(X_train, y_train)

            # Get the best score for the current configuration
            if grid_search.best_score_ < best_score:
                best_score = grid_search.best_score_
                best_num_layers = num_layers
                best_neurons = neurons

# Fit the pipeline with the best configuration on the entire training set
model = Sequential()
model.add(Dense(best_neurons[0], activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.2))
for neuron in best_neurons[1:]:
    model.add(Dense(neuron, activation='relu'))
    model.add(Dropout(0.2))
model.add(Dense(1,activation='linear'))
model.compile(optimizer='adam', loss='mean_squared_error')  # Compile the model


pipeline.set_params(mlp=model)
pipeline.fit(X_train, y_train)

# Evaluate the best model on test data
y_pred = pipeline.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

print("Best number of layers:", best_num_layers+1)
print("Best neuron configuration:", best_neurons)
print("Mean Squared Error:", mse)

Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers\dense
......vars
.........0
.........1
...layers\dense_1
......vars
.........0
.........1
...layers\dropout
......vars
...optimizer
......vars
.........0
...vars
Keras model archive saving:
File Name                                             Modified             Size
config.json                                    2023-06-15 20:22:34         1558
metadata.json                                  2023-06-15 20:22:34           64
variables.h5                                   2023-06-15 20:22:34        16280
Keras model archive loading:
File Name                                             Modified             Size
config.json                                    2023-06-15 20:22:34         1558
metadata.json                                  2023-06-15 20:22:34           64
variables.h5                                   2023-06-15 20:22:34        16280
Keras weights file (<HDF5 file "variables.h5" (mode r)>) loading:

# Best Configeration from Max 5 layers of Hyperparameter Tuning

In [18]:
model1 = Sequential()
model1.add(Dense(32, activation='relu', input_shape=(X_train.shape[1],)))
model1.add(Dropout(0.01))
model1.add(Dense(16, activation='relu'))
model1.add(Dropout(0.01))
model1.add(Dense(8, activation='relu'))
model1.add(Dropout(0.01))
model1.add(Dense(1,activation='linear'))
model1.compile(optimizer='adam', loss='mean_squared_error')
model1.fit( x=X_train, y=y_train, batch_size=32, epochs=100, validation_split = 0.3)
model1.evaluate(X_test, y_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

0.0031768472399562597