In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import xgboost as xgb

df = pd.read_csv("/content/train.csv")
df_test = pd.read_csv("/content/test.csv")

In [None]:
def process_datetime(df):
    df["Timestamp"] = pd.to_datetime(df["Timestamp"], errors="coerce")
    df["Hour"] = df["Timestamp"].dt.hour
    df["Day"] = df["Timestamp"].dt.day
    df["Month"] = df["Timestamp"].dt.month
    df["DayOfWeek"] = df["Timestamp"].dt.dayofweek
    df["IsWeekend"] = (df["DayOfWeek"] >= 5).astype(int)
    df.drop(columns=["Timestamp"], inplace=True)
    return df

df = process_datetime(df)
df_test = process_datetime(df_test)

In [None]:
def fill_missing_values(df):
    num_cols = df.select_dtypes(include=["number"]).columns
    df[num_cols] = df[num_cols].fillna(df[num_cols].mean())

    cat_cols = df.select_dtypes(include=["object"]).columns
    for col in cat_cols:
        df[col] = df[col].fillna(df[col].mode()[0])
    return df

df = fill_missing_values(df)
df_test = fill_missing_values(df_test)

In [None]:
def encode_categorical(df, label_encoders=None, is_train=True):
    if label_encoders is None:
        label_encoders = {}

    cat_cols = df.select_dtypes(include=["object"]).columns
    for col in cat_cols:
        if is_train:
            label_encoders[col] = LabelEncoder()
            df[col] = label_encoders[col].fit_transform(df[col])
        else:
            df[col] = df[col].map(lambda x: label_encoders[col].transform([x])[0] if x in label_encoders[col].classes_ else -1)

    return df, label_encoders

df, label_encoders = encode_categorical(df, is_train=True)
df_test, _ = encode_categorical(df_test, label_encoders, is_train=False)

In [None]:

assert df.select_dtypes(include=["object"]).empty, "Data still contains categorical values!"

X = df.drop(columns=["Water_Consumption"], errors='ignore')
y = df["Water_Consumption"] if "Water_Consumption" in df.columns else None
X_test = df_test.copy()

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

In [None]:
xgb_model = xgb.XGBRegressor(objective="reg:squarederror", n_estimators=600, learning_rate=0.03, max_depth=8, colsample_bytree=0.8)
xgb_model.fit(X_scaled, y)
y_pred = xgb_model.predict(X_scaled)
y_test_pred = xgb_model.predict(X_test_scaled)

mae = mean_absolute_error(y, y_pred)
mse = mean_squared_error(y, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y, y_pred)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2 Score: {r2}")

MAE: 2.6842165903036936
MSE: 15.96904581026205
RMSE: 3.9961288530604278
R2 Score: 0.9969927742302399


In [None]:
df_test_original = pd.read_csv("/content/test.csv")
df_test_original["Timestamp"] = df_test_original["Timestamp"].astype(str)

submission = pd.DataFrame({
    "Timestamp": df_test_original["Timestamp"],
    "Water_Consumption": np.round(y_test_pred, 2)
})

submission.to_csv("submission.csv", index=False)

print("Submission file created File Name: Submission.csv")

Submission file created File Name: Submission.csv
