In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import PowerTransformer, PolynomialFeatures
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.neural_network import MLPRegressor

In [2]:
import os
os.chdir("C:\\Users\\faizan\\Documents\\IMLChallenge02")

In [3]:
train_df = pd.read_csv('./train.csv')
X_test = pd.read_csv('./test.csv')

In [8]:
train_df = train_df.drop_duplicates()

# Identify categorical and numerical columns
categorical_features = train_df.select_dtypes(include=[object]).columns.tolist()
numerical_features = train_df.select_dtypes(include=[np.number]).columns.tolist()
numerical_features.remove('price_doc')

# Prepare target and features
y_train = train_df['price_doc']
X_train = train_df[numerical_features]

# Feature Selection - SelectKBest
selector = SelectKBest(f_regression, k=100)
X_train_selected = selector.fit_transform(X_train, y_train)
X_test_selected = selector.transform(X_test[numerical_features])

# # Polynomial Features (optional, can increase model complexity)
# poly = PolynomialFeatures(degree=1, include_bias=False)
# X_train_poly = poly.fit_transform(X_train_selected)
# X_test_poly = poly.transform(X_test_selected)

# Scale features
scaler = PowerTransformer()
X_train_scaled = scaler.fit_transform(X_train_selected)
X_test_scaled = scaler.transform(X_test_selected)

In [9]:
# Initialize MLPRegressor with modified hyperparameters
mlp_model = MLPRegressor(
    hidden_layer_sizes=(190, 100, 50),
    activation='relu',
    solver='adam',
    learning_rate_init=0.005,
    alpha=0.005,
    max_iter=30,  # Each iteration will be considered an epoch
    random_state=42
)

# Training loop for multiple epochs
epochs = 30
batch_size = 32
num_samples = len(X_train_scaled)

for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    for i in range(0, num_samples, batch_size):
        X_batch = X_train_scaled[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]
        mlp_model.partial_fit(X_batch, y_batch)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [6]:
# Predictions
y_pred = mlp_model.predict(X_test_scaled)

# Create submission DataFrame
submission = pd.DataFrame({'row ID': range(1, len(X_test) + 1), 'price_doc': y_pred.flatten()})

# Save the DataFrame to a CSV file
submission.to_csv('prediction_neuralNetwork.csv', index=False)