In [None]:
# neural net with train data set only

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

# Load the data
main_data = pd.read_csv("./data/train.csv")

# Assume 'critical_temp' is the target variable
X = main_data.drop("critical_temp", axis=1)
y = main_data["critical_temp"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the neural network model architecture
input_dim = X_train.shape[1]
model = Sequential([
    Dense(128, input_dim=input_dim, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    
    Dense(32, activation='relu'),
    BatchNormalization(),
    
    Dense(1, activation='linear')  # Linear activation for regression output
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='mean_squared_error',
              metrics=['mean_squared_error'])

# Print model summary
model.summary()

# Train the model
history = model.fit(X_train, y_train,
                    epochs=50,
                    batch_size=32,
                    validation_split=0.2,
                    verbose=1)

# Evaluate the model on the test set
loss, mse = model.evaluate(X_test, y_test, verbose=1)
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("Test Loss (MSE):", mse)
print("Test RMSE:", rmse)
print("Test R²:", r2)


2025-03-03 19:03:18.407284: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-03 19:03:18.413652: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-03 19:03:18.428422: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741024998.450522    2970 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741024998.457024    2970 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-03 19:03:18.484125: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

Epoch 1/50
[1m426/426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 1865.1516 - mean_squared_error: 1865.1516 - val_loss: 1173.8219 - val_mean_squared_error: 1173.8219
Epoch 2/50
[1m426/426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 776.2873 - mean_squared_error: 776.2873 - val_loss: 380.9692 - val_mean_squared_error: 380.9692
Epoch 3/50
[1m426/426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 414.3273 - mean_squared_error: 414.3273 - val_loss: 386.6674 - val_mean_squared_error: 386.6674
Epoch 4/50
[1m426/426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 337.8121 - mean_squared_error: 337.8121 - val_loss: 363.5819 - val_mean_squared_error: 363.5819
Epoch 5/50
[1m426/426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 340.8986 - mean_squared_error: 340.8986 - val_loss: 319.2773 - val_mean_squared_error: 319.2773
Epoch 6/50
[1m426/426[0m [32m━━━━━━━━━━━━━━

Result:

Test Loss (MSE): 252.77272033691406
Test RMSE: 15.898826688992013
Test R²: 0.7804039767821154



In [2]:
# Neural Net model with combined data set, all features

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

# Load datasets
main_data = pd.read_csv("./data/train.csv")  # Superconductivity dataset
unique_m = pd.read_csv("./data/unique_m.csv")

# Remove 'critical_temp' from unique_m to avoid duplication
unique_m = unique_m.drop(columns=["critical_temp"], errors='ignore')

# Merge datasets assuming rows align (index-based merge)
merged_data = pd.concat([main_data, unique_m], axis=1)

# Define target and features
target = "critical_temp"
X = merged_data.drop(columns=['critical_temp', 'material'], axis=1)
y = merged_data[target]


# Split the data into training and testing sets (80/20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the neural network model architecture
input_dim = X_train.shape[1]
model = Sequential([
    Dense(128, input_dim=input_dim, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    
    Dense(32, activation='relu'),
    BatchNormalization(),
    
    Dense(1, activation='linear')  # Linear activation for regression output
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='mean_squared_error',
              metrics=['mean_squared_error'])

# Print model summary
model.summary()

# Train the model
history = model.fit(X_train, y_train,
                    epochs=50,
                    batch_size=32,
                    validation_split=0.2,
                    verbose=1)

# Evaluate the model on the test set
loss, mse = model.evaluate(X_test, y_test, verbose=1)
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("Test Loss (MSE):", mse)
print("Test RMSE:", rmse)
print("Test R²:", r2)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m426/426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 1933.5967 - mean_squared_error: 1933.5967 - val_loss: 1475.4911 - val_mean_squared_error: 1475.4911
Epoch 2/50
[1m426/426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 792.8510 - mean_squared_error: 792.8510 - val_loss: 687.9149 - val_mean_squared_error: 687.9149
Epoch 3/50
[1m426/426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - loss: 409.5356 - mean_squared_error: 409.5356 - val_loss: 372.8370 - val_mean_squared_error: 372.8370
Epoch 4/50
[1m426/426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 334.9202 - mean_squared_error: 334.9202 - val_loss: 455.8929 - val_mean_squared_error: 455.8929
Epoch 5/50
[1m426/426[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 336.3668 - mean_squared_error: 336.3668 - val_loss: 411.7991 - val_mean_squared_error: 411.7991
Epoch 6/50
[1m426/426[0m [32m━━━━━━━━━━━━━━

Results:

Test Loss (MSE): 350.13287353515625
Test RMSE: 18.71183801857613
Test R²: 0.6958224068192851

