In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error
from tensorflow.keras.callbacks import EarlyStopping

# Add L2 regularization and dropout layers to the model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import BatchNormalization

In [None]:
# Step 1: Load and preprocess the data
data = pd.read_csv('dataset.csv')

In [None]:
data

In [None]:
# Print the column names to inspect them
print("Columns in the dataset:", data.columns.tolist())

In [None]:
all_features = ['cell_migration', 'cell_invasion', 'cell_growth', 'wound_clousure', 'protein_expression', 'colonization', 'average_tumor_volume', 'cell_poliferation_G0-G1phase', 'cell_proliferation_Sphase', 'cell_proliferation_G2-Mphase', 'apoptosis', 'mrna_expression_levels']

In [None]:
# Drop any rows with missing values (optional)
data = data.dropna()

In [None]:
# Separate features and targets (train on all features)
X = data[all_features]
y = data[all_features]

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
def build_model(input_dim, output_dim):
    model = Sequential()
    model.add(Dense(32, input_dim=input_dim, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))

    model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))

    model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.001)))
    model.add(BatchNormalization())
    model.add(Dense(output_dim, activation='linear'))  # Output layer for regression (multiple outputs)

    model.compile(optimizer=Adam(learning_rate=0.0001), loss='mse')
    return model

# Early stopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [None]:
# Build the model
input_dim = X_train_scaled.shape[1]  # Number of input features (all features)
output_dim = y_train.shape[1]  # Number of target variables (all features)
model = build_model(input_dim, output_dim)

In [None]:
history = model.fit(X_train_scaled, y_train, validation_split=0.2, epochs=500, batch_size=32, verbose=1, callbacks=[early_stopping])

In [None]:
# Evaluate on the test set
test_loss = model.evaluate(X_test_scaled, y_test)
print(f'Test Loss: {test_loss}')

In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Evaluate model performance on the test set
test_loss = model.evaluate(X_test_scaled, y_test)
print(f'Test Loss: {test_loss}')

# Compare with training loss
train_loss = history.history['loss'][-1]
print(f'Training Loss: {train_loss}')

if test_loss > train_loss:
    print("The model is likely overfitting.")
else:
    print("The model does not appear to be overfitting.")

In [None]:
# Step 4: Flexible Prediction Interface
def predict_output(selected_input_features, target_feature):
    # Ensure the input features are a subset of the all_features list
    assert all(feature in all_features for feature in selected_input_features), "Invalid input features"
    assert target_feature in all_features, "Invalid target feature"

    # Extract only the selected input features and target feature from the training set
    X_selected = data[selected_input_features]
    y_selected = data[[target_feature]]

    # Standardize the selected input features
    scaler_selected = StandardScaler()
    X_selected_scaled = scaler_selected.fit_transform(X_selected)

    # Build a model for the selected features
    selected_input_dim = len(selected_input_features)
    selected_output_dim = 1  # Predicting a single target
    selected_model = build_model(selected_input_dim, selected_output_dim)

    # Train the model on the selected input features
    X_train_sel, X_test_sel, y_train_sel, y_test_sel = train_test_split(X_selected_scaled, y_selected, test_size=0.2, random_state=42)
    selected_model.fit(X_train_sel, y_train_sel, validation_split=0.2, epochs=100, batch_size=32, verbose=1)

    # Predict on new data
    def predict(selected_values):
        selected_values_scaled = scaler_selected.transform([selected_values])
        prediction = selected_model.predict(selected_values_scaled)
        return prediction[0][0]

    return predict

In [None]:
selected_input_features = ['cell_migration', 'cell_invasion']
target_feature = 'wound_clousure'

In [None]:
assert all(feature in data.columns for feature in selected_input_features), "One or more selected input features do not exist in the dataset."
assert target_feature in data.columns, "Target feature does not exist in the dataset."

In [None]:
predict_fn = predict_output(selected_input_features, target_feature)

In [None]:
input_values = [0.15, 0.25]
predicted_value = predict_fn(input_values)
print(f'Predicted {target_feature} value: {predicted_value}')

In [None]:
from sklearn.metrics import mean_absolute_error, r2_score

# Predict on the test set
y_pred = model.predict(X_test_scaled)

# Calculate Mean Absolute Error
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate R-squared
r2 = r2_score(y_test, y_pred)
print(f'R-squared (R²): {r2}')