# basic NN

In [6]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.metrics import TruePositives, TrueNegatives, FalsePositives, FalseNegatives, AUC, Precision, Recall
from sklearn.metrics import classification_report, roc_auc_score
import joblib

# Read CSV file into a pandas DataFrame
data = pd.read_csv('Cancer_Data.csv')

# Separate features and target
X = data.drop('diagnosis', axis=1)
y = data['diagnosis']

# Perform one-hot encoding for the binary target variable
y_encoded = pd.get_dummies(y, columns=['diagnosis'], drop_first=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

joblib.dump(scaler, 'scaler_model.pkl')


# 3 hidden layers
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
# Compile the model with desired metrics
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[
    TruePositives(name='tp'),
    TrueNegatives(name='tn'),
    FalsePositives(name='fp'),
    FalseNegatives(name='fn'),
    AUC(name='auc'),
    Precision(name='precision'),
    Recall(name='recall')
])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=15, verbose=1, validation_split=0.2)


# Predict on the scaled testing data
y_pred = model.predict(X_test)

y_pred_binary = np.round(y_pred)  # Convert probabilities to binary predictions




# Calculate and print classification report
classification_rep = classification_report(y_test, y_pred_binary, target_names=["class_0", "class_1"])
print("Classification Report:\n", classification_rep)

# Calculate and print AUC-ROC score
auc_roc = roc_auc_score(y_test, y_pred_binary)
print("AUC-ROC Score:", auc_roc)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Classification Report:
               precision    recall  f1-score   support

     class_0       0.97      0.99      0.98        71
     class_1       0.98      0.95      0.96        43

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

AUC-ROC Score: 0.9697019325253848


In [7]:
#save model
import joblib

joblib.dump(model, 'NN_model.pkl')

['NN_model.pkl']

In [None]:
y_encoded = pd.get_dummies(data['diagnosis'], drop_first=True)
print(y_encoded.head())

In [8]:
import joblib
import pandas as pd

mp = joblib.load('NN_model.pkl')  # For scikit-learn versions < 0.24
# For scikit-learn versions >= 0.24, use:
# loaded_model = load('trained_model.pkl')

sp = joblib.load('scaler_model.pkl')



In [9]:
new_data = pd.read_csv('MpracticeData.csv')

new_data_scaled = scaler.transform(new_data)

# Make predictions using the loaded model on the scaled new data
new_predictions = mp.predict(new_data_scaled)

# Convert the predictions to binary classes (0 or 1)
binary_predictions = (new_predictions > 0.5).astype(int)

# Print the binary predictions
print(binary_predictions)
print(new_predictions)




[[1]]
[[0.99999297]]


# more complicated NN

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Read CSV file into a pandas DataFrame
data = pd.read_csv('Cancer_Data.csv')

# Separate features and target
X = data.drop('diagnosis', axis=1)
y = data['diagnosis']

# Perform one-hot encoding for the binary target variable
y_encoded = pd.get_dummies(y, columns=['diagnosis'], drop_first=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 4 hidden layers
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=1, validation_split=0.2)

# Evaluate the model on the test data
y_pred = model.predict(X_test)
y_pred_binary = (y_pred > 0.5).astype(int)

accuracy = accuracy_score(y_test, y_pred_binary)
print("Accuracy:", accuracy)


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Read CSV file into a pandas DataFrame
data = pd.read_csv('Cancer_Data.csv')

# Separate features and target
X = data.drop('diagnosis', axis=1)
y = data['diagnosis']

# Perform one-hot encoding for the binary target variable
y_encoded = pd.get_dummies(y, columns=['diagnosis'], drop_first=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 6 hidden layers
model = keras.Sequential([
    layers.Input(shape=(num_features,)),
    layers.Dense(512, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(128, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(64, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=10, batch_size=32, verbose=1, validation_split=0.2)

# Evaluate the model on the test data
y_pred = model.predict(X_test)
y_pred_binary = (y_pred > 0.5).astype(int)

accuracy = accuracy_score(y_test, y_pred_binary)
print("Accuracy:", accuracy)



# SVM

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load the CSV data using pandas
data = pd.read_csv('Cancer_Data.csv')

X = data.drop('diagnosis', axis=1)
y = data['diagnosis']

# Perform one-hot encoding for the binary target variable
y_encoded = pd.get_dummies(y, columns=['diagnosis'], drop_first=True)




In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create a Support Vector Machine (SVM) model
svm_model = SVC(kernel='rbf')  # You can choose different kernel functions like 'rbf' for non-linear data

# Train the SVM model on the training data
svm_model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = svm_model.predict(X_test)



In [None]:
# Calculate and print the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")