In [15]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the training data
train_path = "/kaggle/input/playground-series-s3e26/train.csv"
train_data = pd.read_csv(train_path)

# Features and target variable
features = ['N_Days', 'Drug', 'Age', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 'Edema',
            'Bilirubin', 'Cholesterol', 'Albumin', 'Copper', 'Alk_Phos', 'SGOT', 'Tryglicerides',
            'Platelets', 'Prothrombin', 'Stage']

target = 'Status'

# Ensure categorical variables are encoded
label_encoder = LabelEncoder()

# Encode categorical variables in both training and test sets
for col in features:
    if train_data[col].dtype == 'object':
        label_encoder.fit(train_data[col])
        train_data[col] = train_data[col].map(lambda s: '<unknown>' if s not in label_encoder.classes_ else s)
        label_encoder.classes_ = pd.Index(label_encoder.classes_).union(['<unknown>'])
        train_data[col] = label_encoder.transform(train_data[col])

# Encode the target variable
train_data['Status'] = label_encoder.fit_transform(train_data['Status'])

# Split the data into features (X) and target variable (y)
X = train_data[features]
y = train_data['Status']

# Convert y to one-hot encoding
y = tf.keras.utils.to_categorical(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
X_mean = X_train.mean(axis=0)
X_std = X_train.std(axis=0)
X_train = (X_train - X_mean) / X_std
X_val = (X_val - X_mean) / X_std

# Define the neural network model with adjusted hyperparameters
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(0.5),  # Introduce dropout regularization
    tf.keras.layers.Dense(3, activation='softmax')
])

# Adjust the learning rate
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

# Compile the model
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val))

# Load the test data
test_path = "/kaggle/input/playground-series-s3e26/test.csv"
test_data = pd.read_csv(test_path)

# Encode categorical variables in the test set
for col in features:
    if test_data[col].dtype == 'object':
        test_data[col] = test_data[col].map(lambda s: '<unknown>' if s not in label_encoder.classes_ else s)

# Convert non-numeric columns to numeric
test_data[features] = test_data[features].apply(pd.to_numeric, errors='coerce')

# Handle missing values (NaN) in the test set
test_data[features] = test_data[features].fillna(0)  # You can replace 0 with any default value or use other imputation methods

# Normalize the test features
X_test = (test_data[features] - X_mean) / X_std

# Make predictions on the test set
predictions = model.predict(X_test)

AttributeError: module 'numpy' has no attribute 'typeDict'