In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
import os
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Set GPU device
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Use the first GPU

# Check for GPU availability
if tf.config.list_physical_devices('GPU'):
    print("GPU is available")
else:
    print("GPU is not available")

# Define paths
data_path = '/kaggle/input/playground-series-s4e7'
train_file = f'{data_path}/train.csv'
test_file = f'{data_path}/test.csv'

# Load the data
train = pd.read_csv(train_file)
test = pd.read_csv(test_file)

# Ensure 'id' column is not in features
test_ids = test['id']

# Encode categorical features
label_encoders = {}
for column in train.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    train[column] = le.fit_transform(train[column])
    test[column] = le.transform(test[column])
    label_encoders[column] = le

# Fill missing values
train.fillna(train.median(), inplace=True)
test.fillna(train.median(), inplace=True)

# Define target column
target_column = 'Response'

# Split features and target
X = train.drop(target_column, axis=1)
y = train[target_column]

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Ensure the test set has the same columns as the training set
test_preprocessed = test[X_train.columns]

# Define the model
model = Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=1024, validation_data=(X_val, y_val))

# Evaluate the model
loss, accuracy = model.evaluate(X_val, y_val)
print(f'Test accuracy: {accuracy}')

# Make predictions
y_test_pred = model.predict(test_preprocessed)
test['Response'] = (y_test_pred > 0.5).astype(int)

# Prepare submission
submission = pd.DataFrame({'id': test_ids, 'Response': test['Response']})
submission.to_csv('submission.csv', index=False)


/kaggle/input/playground-series-s4e7/sample_submission.csv
/kaggle/input/playground-series-s4e7/train.csv
/kaggle/input/playground-series-s4e7/test.csv


2024-07-30 10:56:58.234007: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-30 10:56:58.234170: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-30 10:56:58.387967: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


GPU is available
Epoch 1/10
[1m  80/8989[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m17s[0m 2ms/step - accuracy: 0.7623 - loss: 20851.8223 

I0000 00:00:1722337100.429329      82 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1722337100.447321      82 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m8989/8989[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 2ms/step - accuracy: 0.7813 - loss: 4735.9873 - val_accuracy: 0.1390 - val_loss: 1353.4257
Epoch 2/10
[1m8989/8989[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - accuracy: 0.7835 - loss: 626.4488 - val_accuracy: 0.8446 - val_loss: 14.2931
Epoch 3/10
[1m8989/8989[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2ms/step - accuracy: 0.7885 - loss: 70.3479 - val_accuracy: 0.8746 - val_loss: 0.3960
Epoch 4/10
[1m8989/8989[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - accuracy: 0.8752 - loss: 0.3883 - val_accuracy: 0.8769 - val_loss: 0.3729
Epoch 5/10
[1m8989/8989[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - accuracy: 0.8771 - loss: 0.3728 - val_accuracy: 0.8769 - val_loss: 0.3730
Epoch 6/10
[1m8989/8989[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2ms/step - accuracy: 0.8770 - loss: 0.3729 - val_accuracy: 0.8769 - val_loss: 0.3730
Epoch 7/10
