##Effect of different training_testing_proportion on PANN performance

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from keras.layers import Dense, Dropout


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# Define a function to create and compile the model with a specified learning rate
def create_model(lr):
    model = Sequential()
    model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    optimizer = SGD(learning_rate=lr)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Define a list of training and testing proportions to try
proportions = [(0.6, 0.4), (0.7, 0.3), (0.8, 0.2)]

# Initialize lists to store the accuracy and loss for each proportion
accuracy_list = []
loss_list = []

# Loop over the proportions and train and evaluate the model for each one
for train_prop, test_prop in proportions:
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_prop, random_state=42)

    # Create a sequential neural network model
    model = Sequential()
    model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile the model
    #model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.compile(optimizer=Adam(lr=0.0009), loss='binary_crossentropy', metrics=['accuracy'])

    # Train the model and capture history
    history = model.fit(X_train, y_train, epochs=200, batch_size=32, verbose=1, validation_split=train_prop)

    # Evaluate the model
    y_pred = model.predict(X_test)
    y_pred = np.round(y_pred).flatten()

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)

    # Calculate loss
    loss = model.evaluate(X_test, y_test)[0]

    # Append the accuracy and loss to the lists
    accuracy_list.append(accuracy)
    loss_list.append(loss)

    # Print the results
    print("Training proportion: {}, Testing proportion: {}".format(train_prop, test_prop))
    print("Accuracy: {:.4f}".format(accuracy))
    print("Loss: {:.4f}".format(loss))

# Create a DataFrame to store the results
df_results = pd.DataFrame({'train_prop': [p[0] for p in proportions], 'test_prop': [p[1] for p in proportions], 'accuracy': accuracy_list, 'loss': loss_list})

# Plot the accuracy and loss vs training proportion
fig, ax = plt.subplots(2, 1, figsize=(8, 8))
sns.lineplot(x='train_prop', y='accuracy', data=df_results, ax=ax[0])
ax[0].set_title('Accuracy vs Training Proportion')
sns.lineplot(x='train_prop', y='loss', data=df_results, ax=ax[1])
ax[1].set_title('Loss vs Training Proportion')
plt.tight_layout()
plt.show()

