In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.impute import SimpleImputer

# Load the data
train_data = pd.read_csv(r"C:\Users\91790\Downloads\Train_Data.csv")
test_data = pd.read_csv(r"C:\Users\91790\Downloads\Test_Data.csv")

# Perform label encoding for the target variable
label_encoder = LabelEncoder()
train_data['Healthy'] = label_encoder.fit_transform(train_data['Healthy'])

# Perform one-hot encoding on categorical variables
train_data = pd.get_dummies(train_data, columns=['Smoker?', 'Living in?'])
test_data = pd.get_dummies(test_data, columns=['Smoker?', 'Living in?'])

# Split the data into input features and the target variable
X = train_data.drop(['Healthy', 'Food preference', 'ID2', 'ID1','Any heriditary condition?'], axis=1)
y = train_data['Healthy']

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Impute missing values with the mean
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_val_imputed = imputer.transform(X_val)

# Define the MLP classifier
mlp_classifier = MLPClassifier(hidden_layer_sizes=(64, 64), activation='relu', solver='adam', random_state=42)

# Train the MLP classifier
mlp_classifier.fit(X_train_imputed, y_train)

# Predict the target variable for the test data
y_val_predicted = mlp_classifier.predict(X_val_imputed)

# Prepare the input features for the test data
X_test = test_data.drop(['Food preference', 'ID2', 'ID1','Any heriditary condition?'], axis=1)

# Impute missing values in the test data
X_test_imputed = imputer.transform(X_test)

# Predict the target variable for the test data
y_test_predicted = mlp_classifier.predict(X_test_imputed)


predictions = pd.DataFrame({'lifestyle_predicted': y_test_predicted})
predictions['lifestyle_predicted'] = predictions['lifestyle_predicted'].map({1:1, 0:0})

predictions = predictions.reset_index(drop=True)
submission = pd.DataFrame({'predictions': predictions['lifestyle_predicted'].values.flatten()})
submission.to_csv('submission1.csv', index=False)