In [5]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import roc_auc_score, log_loss, matthews_corrcoef, cohen_kappa_score, precision_recall_curve, auc
from sklearn.preprocessing import label_binarize

# Load dataset (Replace with actual dataset)
df = pd.read_csv("C://Users//Admin//Downloads//DDNL//Big-5.csv")  # Assuming CSV format

# Preprocess dataset
X = df.drop(columns=["long_appx_lots_of_err"])  # Features
y = df["long_appx_lots_of_err"]  # Labels

# Encode labels
le = LabelEncoder()
y = le.fit_transform(y)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the neural network model
def create_model(optimizer):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(len(np.unique(y)), activation='softmax')
    ])
    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

# Experiment with different optimizers
optimizers = {
    "SGD": tf.keras.optimizers.SGD(),
    "Adam": tf.keras.optimizers.Adam(),
    "RMSprop": tf.keras.optimizers.RMSprop(),
    "Adagrad": tf.keras.optimizers.Adagrad(),
    "Nadam": tf.keras.optimizers.Nadam()
}

results = []

for opt_name, opt in optimizers.items():
    print(f"Training with {opt_name} optimizer...")
    model = create_model(opt)
    model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=0, validation_data=(X_test, y_test))
    
    # Get predictions
    y_pred_probs = model.predict(X_test)
    y_pred = np.argmax(y_pred_probs, axis=1)
    
    # Compute alternative metrics
    auc_roc = roc_auc_score(y_test, y_pred_probs, multi_class='ovr')
    logloss = log_loss(y_test, y_pred_probs)
    mcc = matthews_corrcoef(y_test, y_pred)
    cohen_kappa = cohen_kappa_score(y_test, y_pred)

    # Binarize the output labels for PR-AUC computation
    y_test_bin = label_binarize(y_test, classes=np.unique(y))

    pr_auc_scores = []
    for i in range(y_test_bin.shape[1]):  # Iterate over each class
        precision, recall, _ = precision_recall_curve(y_test_bin[:, i], y_pred_probs[:, i])
        pr_auc_scores.append(auc(recall, precision))

    # Average PR-AUC across all classes
    pr_auc = np.mean(pr_auc_scores)
    
    
    # Store results
    results.append([opt_name, auc_roc, logloss, mcc, cohen_kappa, pr_auc])

# Convert results to DataFrame
results_df = pd.DataFrame(results, columns=["Optimizer", "AUC-ROC", "Log Loss", "MCC", "Cohen Kappa", "PR-AUC"])
print(results_df)

# Save results to CSV
results_df.to_csv("C://Users//Admin//Downloads//DDNL//optimizer_results.csv", index=False)


ValueError: could not convert string to float: '2016-03-03 11:59:53'

In [7]:
print(df.columns)  # Check available columns


Index(['EXT1', 'EXT2', 'EXT3', 'EXT4', 'EXT5', 'EXT6', 'EXT7', 'EXT8', 'EXT9',
       'EXT10',
       ...
       'dateload', 'screenw', 'screenh', 'introelapse', 'testelapse',
       'endelapse', 'IPC', 'country', 'lat_appx_lots_of_err',
       'long_appx_lots_of_err'],
      dtype='object', length=110)


In [23]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelBinarizer
from sklearn.metrics import roc_auc_score, log_loss, matthews_corrcoef, classification_report
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

df = pd.read_csv("C://Users//Admin//Downloads//DDNL//Big-5.csv")

X = np.random.rand(1000, 10)  # 1000 samples, 10 features
y = np.random.randint(0, 3, 1000)  # 3 classes (0, 1, 2)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# One-hot encode labels for AUC-ROC
lb = LabelBinarizer()
y_train_onehot = lb.fit_transform(y_train)
y_test_onehot = lb.transform(y_test)

# Ensure one-hot encoding matches the number of classes
num_classes = len(np.unique(y))
if y_train_onehot.shape[1] != num_classes:
    y_train_onehot = np.hstack((y_train_onehot, np.zeros((y_train_onehot.shape[0], num_classes - y_train_onehot.shape[1]))))
if y_test_onehot.shape[1] != num_classes:
    y_test_onehot = np.hstack((y_test_onehot, np.zeros((y_test_onehot.shape[0], num_classes - y_test_onehot.shape[1]))))

# Optimizers to test
optimizers = ['adam', 'sgd', 'rmsprop']
results = []

for optimizer in optimizers:
    print(f"Training with {optimizer} optimizer...")
    
    # Define neural network model
    model = Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')  # Use dynamic class count
    ])
    
    # Compile and train model
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    history = model.fit(X_train, y_train_onehot, epochs=10, batch_size=32, verbose=1, validation_data=(X_test, y_test_onehot))
    
    # Get predictions
    y_pred_probs = model.predict(X_test)  # Probability outputs
    y_pred = np.argmax(y_pred_probs, axis=1)
    
    # Compute metrics
    try:
        auc_roc = roc_auc_score(y_test_onehot, y_pred_probs, multi_class='ovr')
    except ValueError as e:
        print(f"Error in AUC-ROC calculation for {optimizer}:", e)
        auc_roc = None
    
    logloss = log_loss(y_test_onehot, y_pred_probs)
    accuracy = np.mean(y_test == y_pred)
    mcc = matthews_corrcoef(y_test, y_pred)
    
    # Classification report (Precision, Recall, F1-score)
    class_report = classification_report(y_test, y_pred, output_dict=True)
    macro_precision = class_report['macro avg']['precision']
    macro_recall = class_report['macro avg']['recall']
    macro_f1 = class_report['macro avg']['f1-score']
    
    # Store results
    results.append({
        "Optimizer": optimizer,
        "Loss": logloss,
        "Accuracy": accuracy,
        "AUC-ROC": auc_roc,
        "MCC": mcc,
        "Macro Precision": macro_precision,
        "Macro Recall": macro_recall,
        "Macro F1-score": macro_f1
    })

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Save results to CSV
save_path = "C://Users//Admin//Downloads//DDNL//model_performance.csv"
results_df.to_csv(save_path, index=False)
print(f"Results saved to {save_path}")



Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.3412 - loss: 1.1385
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3760 - loss: 1.0958
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4370 - loss: 1.0639
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4697 - loss: 1.0559
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4505 - loss: 1.0558
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4757 - loss: 1.0401
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4801 - loss: 1.0292
Epoch 8/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4801 - loss: 1.0334
Epoch 9/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1