In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Flatten, Dense, BatchNormalization, Dropout, LeakyReLU
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.callbacks import ReduceLROnPlateau

# Loading Data
train_data = pd.read_csv("/content/train.csv")

# Handling the Missing Values
train_data.fillna(train_data.mean(), inplace=True)

target_column = "score"
if target_column not in train_data.columns:
    raise KeyError(f"Target column '{target_column}' not found. Available columns: {train_data.columns}")

# Features and Target
X = train_data.drop(columns=["ID", target_column])
y = train_data[target_column]

# Train-Val Split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize Features using MinMaxScaler (avoids NaN issues)
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Reshape for CNN + LSTM
X_train_cnn = X_train_scaled.reshape(-1, X_train.shape[1], 1)
X_val_cnn = X_val_scaled.reshape(-1, X_train.shape[1], 1)

# Optimized CNN + LSTM Model
cnn_lstm_model = Sequential([
    Conv1D(128, 3, input_shape=(X_train.shape[1], 1)),  # More filters
    LeakyReLU(alpha=0.1),  # Prevents dead neurons
    BatchNormalization(),
    Dropout(0.3),  # Increased dropout

    LSTM(128, return_sequences=True),
    Dropout(0.3),
    LSTM(64),
    Dropout(0.3),

    Dense(128),
    LeakyReLU(alpha=0.1),
    BatchNormalization(),

    Dense(64, activation='relu'),
    Dense(1)
])

# Optimized Adam Optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0003, clipnorm=0.5)
cnn_lstm_model.compile(optimizer=optimizer, loss="mse")

# Reduce LR on Plateau (prevents overfitting)
lr_reducer = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=5, min_lr=1e-5)

# Train CNN + LSTM
cnn_lstm_model.fit(X_train_cnn, y_train, validation_data=(X_val_cnn, y_val),
                   epochs=70, batch_size=32, verbose=1, callbacks=[lr_reducer])

# Predict from CNN + LSTM
cnn_lstm_preds_train = cnn_lstm_model.predict(X_train_cnn).flatten()
cnn_lstm_preds_val = cnn_lstm_model.predict(X_val_cnn).flatten()

#  Improved XGBoost Model
xgb_model = xgb.XGBRegressor(
    n_estimators=2000,  # More trees
    learning_rate=0.01,  # Reduced LR
    max_depth=10,  # Deeper trees
    min_child_weight=2,
    colsample_bytree=0.9,
    subsample=0.95,
    random_state=42
)

# Train XGBoost
xgb_model.fit(X_train_scaled, y_train)

# Predict from XGBoost
xgb_preds_train = xgb_model.predict(X_train_scaled)
xgb_preds_val = xgb_model.predict(X_val_scaled)

# *Stacking Model (Ridge Regression for better stability)*
from sklearn.linear_model import Ridge

stacked_train = np.column_stack((cnn_lstm_preds_train, xgb_preds_train))
stacked_val = np.column_stack((cnn_lstm_preds_val, xgb_preds_val))

meta_model = Ridge(alpha=0.01)  # L2 Regularization
meta_model.fit(stacked_train, y_train)

#  Final Prediction
final_preds = meta_model.predict(stacked_val)

#  Evaluation Metrics
mse = mean_squared_error(y_val, final_preds)
accuracy = np.mean(np.round(final_preds) == np.round(y_val))  # Exact match percentage

#  Harmonic Score Formula
hs = (6 * (1/mse) * accuracy) / ((1/mse) + accuracy)

print(f"MSE: {mse:.4f}, Accuracy: {accuracy:.4f}, Harmonic Score: {hs:.4f}")

#  Loading the Test Data
test_data = pd.read_csv("/content/test.csv")
test_data.fillna(test_data.mean(), inplace=True)

# Preparing Test Features
test_ids = test_data["ID"]
X_test = test_data.drop(columns=["ID"])
X_test_scaled = scaler.transform(X_test)
X_test_cnn = X_test_scaled.reshape(-1, X_test.shape[1], 1)

# Predict using CNN + LSTM and XGBoost
cnn_lstm_preds_test = cnn_lstm_model.predict(X_test_cnn).flatten()
xgb_preds_test = xgb_model.predict(X_test_scaled)

# Stacked Predictions for Test Set
stacked_test = np.column_stack((cnn_lstm_preds_test, xgb_preds_test))

# Meta-Model Final Prediction
final_test_preds = meta_model.predict(stacked_test)

# Create Submission File
submission = pd.DataFrame({"ID": test_ids, "score": final_test_preds})
submission.to_csv("submission_tuned.csv", index=False)

print("Submission file saved as submission_tuned.csv ")

In [None]:
import pandas as pd

# Load the submission file
submission = pd.read_csv("submission_tuned.csv")

# Round the 'score' column to the nearest integer
submission["score"] = submission["score"].round().astype(int)

# Save it back to the same file
submission.to_csv("submission_tuned1.csv", index=False)

print("Rounded scores (integers) updated in submission.csv successfully!")
