<a href="https://colab.research.google.com/github/Manas9991/Miscellaneous/blob/main/HE_Water_Consumption_Hackathon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Importing Libraries:

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from keras import metrics

In [None]:
dfr = pd.read_csv(r'train.csv')
dfs = pd.read_csv(r'test.csv')

In [None]:
submission_dfs = dfs.copy()

## Cleaning Data

In [None]:
dfr.head(10)

Unnamed: 0,Timestamp,Residents,Apartment_Type,Temperature,Humidity,Water_Price,Period_Consumption_Index,Income_Level,Guests,Amenities,Appliance_Usage,Water_Consumption
0,01/01/2002 00,1,Studio,15.31,46.61,1.06,0.97,Low,0,Swimming Pool,0.0,64.85
1,01/01/2002 08,4,,21.01,66.11,2.98,0.91,Upper Middle,1,Swimming Pool,1.0,192.5
2,01/01/2002 16,2,Cottage,12.86,60.86,1.44,1.43,Middle,0,,1.0,116.62
3,02/01/2002 00,2,1BHK,20.16,50.58,1.48,0.91,Middle,-1,Garden,0.0,76.96
4,02/01/2002 08,2,Cottage,16.23,52.25,1.14,1.11,Middle,0,Fountain,0.0,104.7
5,02/01/2002 16,4,2BHK,22.23,53.86,1.15,1.46,Middle,0,,1.0,218.23
6,03/01/2002 00,3,2BHK,10.83,57.51,2.98,1.07,Upper Middle,0,Swimming Pool,0.0,135.8
7,03/01/2002 08,3,Cottage,30.37,33.88,1.35,1.4,yePea,0,Fountain,0.0,202.29
8,03/01/2002 16,4,Bungalow,16.57,57.94,2.84,1.47,Upper Middle,0,Garden,0.0,188.04
9,04/01/2002 00,2,,22.59,57.25,1.11,0.99,Low,1,,1.0,88.94


In [None]:
# prompt: convert Humidity to decimal, it is stored as object, using to_numeric from pandas

dfr['Humidity'] = pd.to_numeric(dfr['Humidity'], errors='coerce')
dfs['Humidity'] = pd.to_numeric(dfs['Humidity'], errors='coerce')

dfr['Residents'] = pd.to_numeric(dfr['Residents'], errors='coerce')
dfs['Residents'] = pd.to_numeric(dfs['Residents'], errors='coerce')

In [None]:
dfr['Amenities'] = dfr['Amenities'].fillna('NULL')
dfs['Amenities'] = dfs['Amenities'].fillna('NULL')

dfr['Appliance_Usage'] = dfr['Appliance_Usage'].fillna(0)
dfs['Appliance_Usage'] = dfs['Appliance_Usage'].fillna(0) #dfr['Appliance_Usage'].mean()

In [None]:
# wherever income level is other than 'Rich', 'Upper Middle', 'Middle', or 'Low', replace it with null values

income_levels = ['Rich', 'Upper Middle', 'Middle', 'Low']
dfr.loc[~dfr['Income_Level'].isin(income_levels), 'Income_Level'] = np.nan
dfs.loc[~dfs['Income_Level'].isin(income_levels), 'Income_Level'] = np.nan

In [None]:
# replace outliers of humidity with nan

# Calculate Q1, Q3, and IQR for Humidity
Q1 = dfr['Humidity'].quantile(0.1)
Q3 = dfr['Humidity'].quantile(0.9)
IQR = Q3 - Q1

# Define bounds for outliers
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Replace outliers with NaN
dfr.loc[(dfr['Humidity'] < lower_bound) | (dfr['Humidity'] > upper_bound), 'Humidity'] = np.nan
dfs.loc[(dfs['Humidity'] < lower_bound) | (dfs['Humidity'] > upper_bound), 'Humidity'] = np.nan

In [None]:
# prompt: if residents is below 0, make it nan

# Replace Residents values below 0 with NaN
dfr.loc[dfr['Residents'] < 0, 'Residents'] = np.nan
dfs.loc[dfs['Residents'] < 0, 'Residents'] = np.nan

In [None]:
dfr.loc[dfr['Water_Price'] < 0, 'Water_Price'] = np.nan
dfs.loc[dfs['Water_Price'] < 0, 'Water_Price'] = np.nan

In [None]:
# Create copies with selected columns
dfr_subset = dfr[['Temperature', 'Water_Price', 'Humidity']].copy()
dfs_subset = dfs[['Temperature', 'Water_Price', 'Humidity']].copy()


# Split into train and test sets based on humidity null values
train_df = dfr_subset.dropna(subset=['Humidity'])
test_df = dfr_subset[dfr_subset['Humidity'].isnull()]
dfs_test = dfs_subset[dfs_subset['Humidity'].isnull()]


# Separate features (X) and target (y) for training
X_train = train_df[['Temperature', 'Water_Price']]
y_train = train_df['Humidity']

X_train['Temperature'] = X_train['Temperature'].fillna(X_train['Temperature'].median())
X_train['Water_Price'] = X_train['Water_Price'].fillna(X_train['Water_Price'].median())

# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict humidity for the test set
X_test = test_df[['Temperature', 'Water_Price']]
X_test['Temperature'] = X_test['Temperature'].fillna(X_test['Temperature'].median())
X_test['Water_Price'] = X_test['Water_Price'].fillna(X_test['Water_Price'].median())
predicted_humidity = model.predict(X_test)

# Replace null humidity values in the original dataframe with predictions
dfr.loc[dfr['Humidity'].isnull(), 'Humidity'] = predicted_humidity

# Predict humidity for the test set from the other dataframe
X_test_other = dfs_test[['Temperature', 'Water_Price']]
X_test_other['Temperature'] = X_test_other['Temperature'].fillna(X_test_other['Temperature'].median())
X_test_other['Water_Price'] = X_test_other['Water_Price'].fillna(X_test_other['Water_Price'].median())
predicted_humidity_other = model.predict(X_test_other)

# Replace null humidity values in the original dataframe with predictions
dfs.loc[dfs['Humidity'].isnull(), 'Humidity'] = predicted_humidity_other


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train['Temperature'] = X_train['Temperature'].fillna(X_train['Temperature'].median())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train['Water_Price'] = X_train['Water_Price'].fillna(X_train['Water_Price'].median())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test['Temperature'] = X_tes

In [None]:
# Create copies with selected columns
dfr_subset = dfr[['Humidity', 'Temperature']].copy()  # Note the order of columns
dfs_subset = dfs[['Humidity', 'Temperature']].copy()

# Split into train and test sets based on temperature null values
train_df = dfr_subset.dropna(subset=['Temperature'])
test_df = dfr_subset[dfr_subset['Temperature'].isnull()]
dfs_test = dfs_subset[dfs_subset['Temperature'].isnull()]

# Separate features (X) and target (y) for training
X_train = train_df[['Humidity']]
y_train = train_df['Temperature']

X_train['Humidity'] = X_train['Humidity'].fillna(X_train['Humidity'].median())

# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict temperature for the test set
X_test = test_df[['Humidity']]
X_test['Humidity'] = X_test['Humidity'].fillna(X_test['Humidity'].median())
predicted_temperature = model.predict(X_test)

# Replace null temperature values in the original dataframe with predictions
dfr.loc[dfr['Temperature'].isnull(), 'Temperature'] = predicted_temperature

# Predict temperature for the test set from the other dataframe
X_test_other = dfs_test[['Humidity']]
X_test_other['Humidity'] = X_test_other['Humidity'].fillna(X_test_other['Humidity'].median())
predicted_temperature_other = model.predict(X_test_other)

# Replace null temperature values in the original dataframe with predictions
dfs.loc[dfs['Temperature'].isnull(), 'Temperature'] = predicted_temperature_other


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train['Humidity'] = X_train['Humidity'].fillna(X_train['Humidity'].median())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test['Humidity'] = X_test['Humidity'].fillna(X_test['Humidity'].median())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test_other['Humidity'] = X_test_other['Humidity'

In [None]:
# Create copies with selected columns
dfr_subset = dfr[['Residents', 'Apartment_Type', 'Water_Price']].copy()
dfs_subset = dfs[['Residents', 'Apartment_Type', 'Water_Price']].copy()

# Impute missing values
dfr_subset['Apartment_Type'] = dfr_subset['Apartment_Type'].fillna(dfr_subset['Apartment_Type'].mode()[0])
dfr_subset['Water_Price'] = dfr_subset['Water_Price'].fillna(dfr_subset['Water_Price'].median())
dfs_subset['Apartment_Type'] = dfs_subset['Apartment_Type'].fillna(dfs_subset['Apartment_Type'].mode()[0])
dfs_subset['Water_Price'] = dfs_subset['Water_Price'].fillna(dfs_subset['Water_Price'].median())

# Convert Apartment_Type to numerical representation
# Create a mapping from apartment types to numbers
apartment_mapping = {apartment_type: i for i, apartment_type in enumerate(dfr_subset['Apartment_Type'].unique())}
dfr_subset['Apartment_Type'] = dfr_subset['Apartment_Type'].map(apartment_mapping)
dfs_subset['Apartment_Type'] = dfs_subset['Apartment_Type'].map(apartment_mapping)

# Handle potential NaN values in the mapping (if any new apartment types exist in test)
dfs_subset['Apartment_Type'] = dfs_subset['Apartment_Type'].fillna(dfs_subset['Apartment_Type'].median())


# Split into train and test sets based on Residents null values
train_df = dfr_subset.dropna(subset=['Residents'])
test_df = dfr_subset[dfr_subset['Residents'].isnull()]
dfs_test = dfs_subset[dfs_subset['Residents'].isnull()]

# Separate features (X) and target (y) for training
X_train = train_df[['Apartment_Type', 'Water_Price']]
y_train = train_df['Residents']

# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict Residents for the test set
X_test = test_df[['Apartment_Type', 'Water_Price']]
predicted_residents = model.predict(X_test)

# Replace null Residents values in the original dataframe with predictions
dfr.loc[dfr['Residents'].isnull(), 'Residents'] = predicted_residents

# Predict Residents for the test set from the other dataframe
X_test_other = dfs_test[['Apartment_Type', 'Water_Price']]
predicted_residents_other = model.predict(X_test_other)

# Replace null Residents values in the original dataframe with predictions
dfs.loc[dfs['Residents'].isnull(), 'Residents'] = predicted_residents_other


In [None]:
# Create copies with selected columns
dfr_subset = dfr[['Water_Price', 'Temperature', 'Apartment_Type', 'Humidity']].copy()
dfs_subset = dfs[['Water_Price', 'Temperature', 'Apartment_Type', 'Humidity']].copy()

# Impute missing values
dfr_subset['Apartment_Type'] = dfr_subset['Apartment_Type'].fillna(dfr_subset['Apartment_Type'].mode()[0])
dfr_subset['Humidity'] = dfr_subset['Humidity'].fillna(dfr_subset['Humidity'].median())
dfs_subset['Apartment_Type'] = dfs_subset['Apartment_Type'].fillna(dfs_subset['Apartment_Type'].mode()[0])
dfs_subset['Humidity'] = dfs_subset['Humidity'].fillna(dfs_subset['Humidity'].median())


# Convert Apartment_Type to numerical representation
# Create a mapping from apartment types to numbers
apartment_mapping = {apartment_type: i for i, apartment_type in enumerate(dfr_subset['Apartment_Type'].unique())}
dfr_subset['Apartment_Type'] = dfr_subset['Apartment_Type'].map(apartment_mapping)
dfs_subset['Apartment_Type'] = dfs_subset['Apartment_Type'].map(apartment_mapping)

# Handle potential NaN values in the mapping (if any new apartment types exist in test)
dfs_subset['Apartment_Type'] = dfs_subset['Apartment_Type'].fillna(dfs_subset['Apartment_Type'].median())


# Split into train and test sets based on Water_Price null values
train_df = dfr_subset.dropna(subset=['Water_Price'])
test_df = dfr_subset[dfr_subset['Water_Price'].isnull()]
dfs_test = dfs_subset[dfs_subset['Water_Price'].isnull()]

# Separate features (X) and target (y) for training
X_train = train_df[['Temperature', 'Apartment_Type', 'Humidity']]
y_train = train_df['Water_Price']

# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict Water_Price for the test set
X_test = test_df[['Temperature', 'Apartment_Type', 'Humidity']]
predicted_water_price = model.predict(X_test)

# Replace null Water_Price values in the original dataframe with predictions
dfr.loc[dfr['Water_Price'].isnull(), 'Water_Price'] = predicted_water_price

# Predict Water_Price for the test set from the other dataframe
X_test_other = dfs_test[['Temperature', 'Apartment_Type', 'Humidity']]
predicted_water_price_other = model.predict(X_test_other)

# Replace null Water_Price values in the original dataframe with predictions
dfs.loc[dfs['Water_Price'].isnull(), 'Water_Price'] = predicted_water_price_other


In [None]:
# prompt: create a dataframe containing income levels and corresponding highest frequency apartment types. Also print the frequency

# Group by income level and apartment type, count occurrences
income_apartment_counts = dfr.groupby(['Income_Level', 'Apartment_Type']).size().reset_index(name='counts')

# Find the most frequent apartment type for each income level
income_apartment_max = income_apartment_counts.loc[income_apartment_counts.groupby('Income_Level')['counts'].idxmax()]

# Create the DataFrame
income_apartment_df = pd.DataFrame({'Income_Level': income_apartment_max['Income_Level'],
                                   'Most Frequent Apartment': income_apartment_max['Apartment_Type'],
                                   'Frequency': income_apartment_max['counts']})

# Print the DataFrame
income_apartment_df


Unnamed: 0,Income_Level,Most Frequent Apartment,Frequency
1,Low,Studio,1125
2,Middle,1BHK,1734
5,Rich,3BHK,518
10,Upper Middle,Bungalow,1297


In [None]:
# prompt: create a dataframe containing residents (rounded and converted to integer) and corresponding highest frequency apartment types. Also print the frequency

# Round and convert 'Residents' to integers
dfr['Residents'] = dfr['Residents'].round().astype(int)

# Group by 'Residents' and 'Apartment_Type', count occurrences
resident_apartment_counts = dfr.groupby(['Residents', 'Apartment_Type']).size().reset_index(name='counts')

# Find the most frequent apartment type for each resident count
resident_apartment_max = resident_apartment_counts.loc[resident_apartment_counts.groupby('Residents')['counts'].idxmax()]

# Create the DataFrame
resident_apartment_df = pd.DataFrame({
    'Residents': resident_apartment_max['Residents'],
    'Most Frequent Apartment': resident_apartment_max['Apartment_Type'],
    'Frequency': resident_apartment_max['counts']
})

# Print the DataFrame and frequency
resident_apartment_df


Unnamed: 0,Residents,Most Frequent Apartment,Frequency
0,1,Studio,590
1,2,1BHK,1529
5,3,1BHK,1490
12,4,2BHK,999
16,5,2BHK,1029


In [None]:
# prompt: replace missing apartment type values so that they are mapped to the dataframe income_apartment_df.

# Replace missing 'Apartment_Type' values based on 'Income_Level'
def replace_missing_apartment_type(row):
    if pd.isnull(row['Apartment_Type']):
        income_level = row['Income_Level']
        if income_level in income_apartment_df['Income_Level'].values:
            return income_apartment_df[income_apartment_df['Income_Level'] == income_level]['Most Frequent Apartment'].iloc[0]
        else:
            return '1BHK'
    return row['Apartment_Type']

dfr['Apartment_Type'] = dfr.apply(replace_missing_apartment_type, axis=1)
dfs['Apartment_Type'] = dfs.apply(replace_missing_apartment_type, axis=1)

In [None]:

# Group by apartment type and income level, count occurrences
apartment_income_counts = dfr.groupby(['Apartment_Type', 'Income_Level']).size().reset_index(name='counts')

# Find the most frequent income level for each apartment type
apartment_income_max = apartment_income_counts.loc[apartment_income_counts.groupby('Apartment_Type')['counts'].idxmax()]

# Create the DataFrame
apartment_income_df = pd.DataFrame({
    'Apartment_Type': apartment_income_max['Apartment_Type'],
    'Most Frequent Income': apartment_income_max['Income_Level'],
    'Frequency': apartment_income_max['counts']
})

# Print the DataFrame
apartment_income_df


Unnamed: 0,Apartment_Type,Most Frequent Income,Frequency
1,1BHK,Middle,1922
2,2BHK,Middle,1675
5,3BHK,Upper Middle,1275
7,Bungalow,Upper Middle,1402
8,Cottage,Middle,1726
9,Detached,Rich,517
10,Studio,Low,1193


In [None]:
# prompt: replace missing income level values so that they are mapped to the dataframe apartment_income_df. If apartment type is null, return text 'NULL'

# Replace missing 'Income_Level' values based on 'Apartment_Type'
def replace_missing_income_level(row):
    if pd.isnull(row['Income_Level']):
        apartment_type = row['Apartment_Type']
        if apartment_type in apartment_income_df['Apartment_Type'].values:
            return apartment_income_df[apartment_income_df['Apartment_Type'] == apartment_type]['Most Frequent Income'].iloc[0]
        else:
            return 'Middle'  # Or another appropriate default value
    return row['Income_Level']

dfr['Income_Level'] = dfr.apply(replace_missing_income_level, axis=1)
dfs['Income_Level'] = dfs.apply(replace_missing_income_level, axis=1)


In [None]:
# prompt: convert Apartment type and income level to one hot encoded variables, and remove the original columns. Before doing that, replace everything that's not 'Rich', 'Upper Middle', 'Middle', 'Low', or 'NULL' with 'Others' in income level. Same for apartment type, use the top 6 frequently occurring apartment types and 'NULL' and replace the rest with Others. In one hot encoding, do not convert to True and False, make it numeric 1 and 0. Do the same for Amenities variable also, take all values including 'NULL'

# One-hot encode 'Income_Level', 'Apartment_Type', and 'Amenities'
def one_hot_encode(df, column_name):
    dummies = pd.get_dummies(df[column_name], prefix=column_name, drop_first=False)
    df = pd.concat([df, dummies], axis=1)
    df = df.drop(column_name, axis=1)
    return df

for col in ['Income_Level', 'Apartment_Type', 'Amenities']:
    dfr = one_hot_encode(dfr, col)
    dfs = one_hot_encode(dfs, col)


In [None]:
dfr.drop(columns = ['Timestamp'], inplace = True)
dfs.drop(columns = ['Timestamp'], inplace = True)

In [None]:
dfr['Total_Occupants'] = dfr['Residents'] + dfr['Guests']
dfs['Total_Occupants'] = dfs['Residents'] + dfs['Guests']

In [None]:
# Ceil and floor outliers for Humidity
def cap_outliers(df, column, lower_percentile=0.1, upper_percentile=0.9):
    lower_bound = df[column].quantile(lower_percentile)
    upper_bound = df[column].quantile(upper_percentile)
    df[column] = np.clip(df[column], lower_bound, upper_bound)
    return df

dfr = cap_outliers(dfr, 'Guests', 0.02, 0.95)
dfs = cap_outliers(dfs, 'Guests', 0.02, 0.95)

dfr = cap_outliers(dfr, 'Period_Consumption_Index', 0.005, 0.99)
dfs = cap_outliers(dfs, 'Period_Consumption_Index', 0.005, 0.99)

In [None]:
# Feature Engineering

dfr['Temp_Humidity_Interaction'] = dfr['Temperature'] * dfr['Humidity']
dfs['Temp_Humidity_Interaction'] = dfs['Temperature'] * dfs['Humidity']

In [None]:
# prompt: convert all dfr and dfs columns to_numeric if they aren't already

# Convert columns to numeric, handling errors
for col in dfr.columns:
    if not pd.api.types.is_numeric_dtype(dfr[col]):
        dfr[col] = pd.to_numeric(dfr[col], errors='coerce')

for col in dfs.columns:
    if not pd.api.types.is_numeric_dtype(dfs[col]):
        dfs[col] = pd.to_numeric(dfs[col], errors='coerce')


In [None]:
dfr['Period_Consumption_Index'].describe()

Unnamed: 0,Period_Consumption_Index
count,14000.0
mean,1.151297
std,0.219886
min,0.392344
25%,0.97
50%,1.15
75%,1.33
max,1.645256


In [None]:
dfr.head()

Unnamed: 0,Residents,Temperature,Humidity,Water_Price,Period_Consumption_Index,Guests,Appliance_Usage,Water_Consumption,Income_Level_Low,Income_Level_Middle,...,Apartment_Type_Cottage,Apartment_Type_Detached,Apartment_Type_Studio,Amenities_Fountain,Amenities_Garden,Amenities_Jacuzzi,Amenities_NULL,Amenities_Swimming Pool,Total_Occupants,Temp_Humidity_Interaction
0,1,15.31,46.61,1.06,0.97,0,0.0,64.85,True,False,...,False,False,True,False,False,False,False,True,1,713.5991
1,4,21.01,66.11,2.98,0.91,1,1.0,192.5,False,False,...,False,False,False,False,False,False,False,True,5,1388.9711
2,2,12.86,60.86,1.44,1.43,0,1.0,116.62,False,True,...,True,False,False,False,False,False,True,False,2,782.6596
3,2,20.16,50.58,1.48,0.91,0,0.0,76.96,False,True,...,False,False,False,False,True,False,False,False,1,1019.6928
4,2,16.23,52.25,1.14,1.11,0,0.0,104.7,False,True,...,True,False,False,True,False,False,False,False,2,848.0175


##Applying ML Models:

In [None]:
x = dfr.drop(columns=['Water_Consumption'])
y = dfr['Water_Consumption']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x, y, train_size = 0.99995)

In [None]:
feature_names = dfs.columns

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
dfs = scaler.fit_transform(dfs)

In [None]:
# Define the model
model = keras.Sequential([
    keras.layers.Dense(128, activation='relu6', input_shape=(X_train.shape[1],)),
    keras.layers.Dense(64, activation='relu6'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1)  # Output layer for regression
])

# Compile the model
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=1000,
    decay_rate=0.85)
opt = keras.optimizers.Adam(learning_rate=lr_schedule)

model.compile(optimizer=opt, loss='mse', metrics=[metrics.MeanSquaredError()])

# Train the model
history = model.fit(X_train, y_train, epochs=300, batch_size=64, validation_split=0.2, verbose=1)

# Evaluate the model
y_pred = model.predict(X_test)
train_mae = mean_absolute_error(y_train, model.predict(X_train))
test_mae = mean_absolute_error(y_test, y_pred)

# Evaluate the model
loss, mae = model.evaluate(X_test, y_test, verbose=0)
print(f"Mean Absolute Error on Test Data: {mae}")
y_pred = model.predict(X_test)
print("Train RMSE: ", np.sqrt(mean_squared_error(y_train, model.predict(X_train))))
print("Test RMSE: ", np.sqrt(mean_squared_error(y_test, y_pred)))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/300
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 11ms/step - loss: 22137.3066 - mean_squared_error: 22137.3066 - val_loss: 1383.0845 - val_mean_squared_error: 1383.0845
Epoch 2/300
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 962.0498 - mean_squared_error: 962.0498 - val_loss: 500.6865 - val_mean_squared_error: 500.6865
Epoch 3/300
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 416.4462 - mean_squared_error: 416.4462 - val_loss: 336.4789 - val_mean_squared_error: 336.4789
Epoch 4/300
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 306.1096 - mean_squared_error: 306.1096 - val_loss: 281.4936 - val_mean_squared_error: 281.4936
Epoch 5/300
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 267.9052 - mean_squared_error: 267.9052 - val_loss: 277.3311 - val_mean_squared_error: 277.3311
Epoch 6/300
[1m175/175[0m [32m━━━━

##Final Submission:

In [None]:
submission_dfs['Timestamp'] = submission_dfs['Timestamp'].astype(str).str[:13]

In [None]:
ansy = model.predict(dfs) #Remember to train on 100% data before submission
ansy = ansy[:,0] #only for neural network, else comment it
ide = []
ansye = []
for i in range(len(ansy)+1):
  if(i == 0):
    ide.append("Timestamp")
    ansye.append("Water_Consumption")
  else:
    ide.append(submission_dfs['Timestamp'].iloc[i-1])
    ansye.append(ansy[i-1])

np.savetxt("Pred.csv", list(zip(ide, ansye)), delimiter=",", fmt='%s')

[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
