In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import keras_tuner as kt
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [2]:
#load the data into a dataframe
filepath = r"C:\Users\Admin\Machine Learning Fundamentals\CSV_files\MachineLearning-Data.xlsx"
df = pd.read_excel(filepath)

#drop one of the useless column.
df =  df.drop(columns=['DA concentration'])

#assign feature and target variables and display the rows of the dataframe.
X = df.drop('DA Concentration (uM)', axis=1) 
y = df['DA Concentration (uM)']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standarize the features same as SVR
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [3]:
# Define a function to create the model for KerasTuner
def build_model(hp):
    model = Sequential()
    # Tune number of units in the first Dense layer
    hp_units1 = hp.Int('units1', min_value=32, max_value=128, step=16)
    model.add(Dense(units=hp_units1, activation='relu', input_dim=X_train_scaled.shape[1]))
    
    # Tune the number of hidden layers and units in each
    for i in range(hp.Int('num_layers', 1, 3)):  # up to 3 hidden layers
        hp_units = hp.Int(f'units_{i}', min_value=16, max_value=128, step=16)
        model.add(Dense(units=hp_units, activation='relu'))
    
    # Output layer
    model.add(Dense(1, activation='linear'))  # Regression output
    
    # Tune learning rate for Adam optimizer
    hp_learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')
    model.compile(optimizer=Adam(learning_rate=hp_learning_rate), loss='mean_squared_error')
    
    return model

In [4]:
# Instantiate the tuner
tuner = kt.RandomSearch(
    build_model,                 # function to create the model
    objective='val_loss',         # metric to optimize
    max_trials=10,                # number of different hyperparameter combinations to try
    executions_per_trial=2,       # repeat each trial multiple times for robustness
    directory='tuner_results',    # where to save the results
    project_name='ann_tuning'     # name for this tuning project
)

# Run the tuner search
tuner.search(X_train_scaled, y_train, epochs=100, validation_split=0.2, batch_size=8)

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build the best model
model = tuner.hypermodel.build(best_hps)
model.summary()


Trial 10 Complete [00h 00m 48s]
val_loss: 2792.6502685546875

Best val_loss So Far: 2537.05419921875
Total elapsed time: 00h 06m 24s


In [6]:
print(best_hps.values)

{'units1': 48, 'num_layers': 1, 'units_0': 16, 'learning_rate': 0.0034216562598739053, 'units_1': 96, 'units_2': 16}


In [5]:
# Train the best model
history = model.fit(X_train_scaled, y_train, epochs=100, batch_size=8, validation_split=0.2, verbose=1)

# Evaluate on the test set
y_pred_ann = model.predict(X_test_scaled)
mse_ann = mean_squared_error(y_test, y_pred_ann)
r2_ann = r2_score(y_test, y_pred_ann)

# Output performance
print(f"Mean Squared Error (Optimized ANN): {mse_ann}")
print(f"R^2 Score (Optimized ANN): {r2_ann}")

Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 50ms/step - loss: 6389.1885 - val_loss: 5631.4082
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 7922.4556 - val_loss: 5610.3057
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 6209.9453 - val_loss: 5588.7417
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 5980.7915 - val_loss: 5563.0425
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 4888.9365 - val_loss: 5531.3091
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 7621.5503 - val_loss: 5490.9639
Epoch 7/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 7453.5957 - val_loss: 5443.0000
Epoch 8/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 5150.7207 - val_loss: 5388.6187
Epoch 9/

In [7]:
#predict the target variable using the test set
y_pred = model.predict(X_test_scaled)
#show predictions vs actual values
data = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred.flatten()})
print(data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
    Actual   Predicted
13       0   -1.618408
39     200  155.563293
30      75   65.379234
45      30   36.224384
17      10   28.034567
48      30   95.971596
26      50   44.035027
25      50   39.590302
32     100   99.706055
19      20   27.569849


In [8]:
#let's plot the predicted values against the actual values
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=y_test.values,
    y=y_pred.flatten(),
    mode='markers',
    name='Predictions',
    marker=dict(color='blue', opacity=0.5)
))

# Add the diagonal line for actual value
fig.add_trace(go.Scatter(
    x=[y_test.min(), y_test.max()],
    y=[y_test.min(), y_test.max()],
    mode='lines',
    name='Perfect Prediction',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='Actual vs Predicted Values',
    xaxis_title='Actual Values',
    yaxis_title='Predicted Values',
    width=800,
    height=600,
    hovermode='closest'
)

fig.show()