In [1]:
import pandas as pd
from google.colab import files
uploaded = files.upload()
# Load the dataset
df = pd.read_csv('pdc_dataset_with_target.csv')


Saving pdc_dataset_with_target.csv to pdc_dataset_with_target.csv


In [2]:
import pandas as pd
import numpy as np
import time
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Timer start
main_timer_start = time.time()
print("========== GPU-ACCELERATED TRAINING ==========")
print(f"Process started at: {time.strftime('%Y-%m-%d %H:%M:%S')}")

# Load your dataset
# df = pd.read_csv("your_file.csv")  # Uncomment and adjust if loading from file

# Split data
X = df.drop("target", axis=1)
y = df["target"]

# Identify feature types
numeric_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = X.select_dtypes(include=['object']).columns.tolist()

# Preprocessing
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])
preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, numeric_features),
    ('cat', categorical_transformer, categorical_features)
])

# Apply preprocessing
X_processed = preprocessor.fit_transform(X)
y_processed = y.values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_processed, y_processed, test_size=0.2, stratify=y, random_state=42)

# Convert to dense arrays (if sparse)
if hasattr(X_train, "todense"):
    X_train = X_train.todense()
    X_test = X_test.todense()

# Build GPU-accelerated model
input_dim = X_train.shape[1]
model = Sequential([
    Dense(128, activation='relu', input_dim=input_dim),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')  # For binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=50, batch_size=64, validation_split=0.2, callbacks=[early_stop], verbose=2)

# Evaluation
y_pred = (model.predict(X_test) > 0.5).astype(int).flatten()

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("\n========== RESULTS ==========")
print(f"Accuracy on test set: {accuracy:.4f}")
print("\nClassification Report:")
print(report)
print("\nConfusion Matrix:")
print(conf_matrix)

print(f"\nTotal computation time: {time.time() - main_timer_start:.2f} seconds")
print("========== PROCESS COMPLETED ==========")


Process started at: 2025-05-04 04:15:09
Epoch 1/50
410/410 - 6s - 14ms/step - accuracy: 0.5998 - loss: 0.6775 - val_accuracy: 0.5986 - val_loss: 0.6744
Epoch 2/50
410/410 - 2s - 5ms/step - accuracy: 0.6017 - loss: 0.6740 - val_accuracy: 0.5995 - val_loss: 0.6737
Epoch 3/50
410/410 - 1s - 3ms/step - accuracy: 0.6022 - loss: 0.6732 - val_accuracy: 0.5994 - val_loss: 0.6744
Epoch 4/50
410/410 - 1s - 3ms/step - accuracy: 0.6027 - loss: 0.6728 - val_accuracy: 0.5995 - val_loss: 0.6737
Epoch 5/50
410/410 - 1s - 3ms/step - accuracy: 0.6026 - loss: 0.6723 - val_accuracy: 0.5994 - val_loss: 0.6736
Epoch 6/50
410/410 - 1s - 3ms/step - accuracy: 0.6022 - loss: 0.6721 - val_accuracy: 0.5992 - val_loss: 0.6733
Epoch 7/50
410/410 - 1s - 3ms/step - accuracy: 0.6025 - loss: 0.6721 - val_accuracy: 0.5995 - val_loss: 0.6739
Epoch 8/50
410/410 - 1s - 3ms/step - accuracy: 0.6027 - loss: 0.6716 - val_accuracy: 0.5995 - val_loss: 0.6740
Epoch 9/50
410/410 - 1s - 3ms/step - accuracy: 0.6025 - loss: 0.6715 - 