In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from scipy.stats import expon, reciprocal
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


In [16]:
# Load the dataset
train_df = pd.read_csv('csv_files/train.csv')

# Convert 'Transported' to integer (True=1, False=0) for modeling
train_df['Transported'] = train_df['Transported'].astype(int)

# Feature engineering (if any)
# Assuming your feature engineering steps here

# Prepare features and target
features = ['RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck', 'Age']  # Replace with your actual features
X = train_df[features]
y = train_df['Transported']

# Impute missing values
imputer = SimpleImputer(strategy='median')
X_imputed = imputer.fit_transform(X)

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=70, batch_size=32, validation_data=(X_val, y_val), verbose=1)
# Evaluate on validation set
val_loss, val_acc = model.evaluate(X_val, y_val, verbose=2)
print(f'Validation Accuracy: {val_acc}')




Epoch 1/70


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 660us/step - accuracy: 0.7297 - loss: 0.5732 - val_accuracy: 0.7677 - val_loss: 0.5038
Epoch 2/70
[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 380us/step - accuracy: 0.7921 - loss: 0.4745 - val_accuracy: 0.7769 - val_loss: 0.4945
Epoch 3/70
[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 375us/step - accuracy: 0.7975 - loss: 0.4731 - val_accuracy: 0.7775 - val_loss: 0.4945
Epoch 4/70
[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 379us/step - accuracy: 0.7901 - loss: 0.4807 - val_accuracy: 0.7780 - val_loss: 0.5035
Epoch 5/70
[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 398us/step - accuracy: 0.7914 - loss: 0.4730 - val_accuracy: 0.7803 - val_loss: 0.4915
Epoch 6/70
[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 398us/step - accuracy: 0.7952 - loss: 0.4770 - val_accuracy: 0.7752 - val_loss: 0.4926
Epoch 7/70
[1m218/218[0m 

In [17]:
test_df = pd.read_csv('csv_files/test.csv')
# Impute missing values
X_test_imputed = imputer.transform(test_df[features])  # Use the same imputer as for the training data

# Scale the features
X_test_scaled = scaler.transform(X_test_imputed)  # Use the same scaler as for the training data
# Make predictions with the TensorFlow model
y_pred_test_proba = model.predict(X_test_scaled)
y_pred_test = (y_pred_test_proba > 0.5).astype(int).flatten()  # Convert probabilities to binary predictions
# Prepare the submission dataframe
submission_df = pd.DataFrame({
    'PassengerId': test_df['PassengerId'],
    'Transported': y_pred_test
})

# Convert predictions back to boolean (True/False) if necessary
submission_df['Transported'] = submission_df['Transported'].astype(bool)

# Save the submission file
submission_df.to_csv('tensorflow_result.csv', index=False)


[1m134/134[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 365us/step
