In [9]:
import pandas as pd

# Load data
df = pd.read_parquet('indore_water_usage_data_difficult.parquet')

# Print column names
print(df.columns)


Index(['Household ID', 'Ward', 'Area', 'Monthly Water Usage (Liters)',
       'Leakage Detected (Yes/No)', 'Disparity in Supply (Yes/No)',
       'Income Level', 'Household Size', 'Date'],
      dtype='object')


In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

# Load data
df = pd.read_parquet('indore_water_usage_data_difficult.parquet')

# Convert categorical columns to numerical
df['Leakage Detected (Yes/No)'] = df['Leakage Detected (Yes/No)'].map({'Yes': 1, 'No': 0})
df['Disparity in Supply (Yes/No)'] = df['Disparity in Supply (Yes/No)'].map({'Yes': 1, 'No': 0})
df['Income Level'] = df['Income Level'].map({'Low': 0, 'Medium': 1, 'High': 2})

# Drop unnecessary columns
df = df.drop(columns=['Date'])

# Define feature names
feature_names = ['Household ID', 'Ward', 'Area', 'Leakage Detected (Yes/No)', 'Disparity in Supply (Yes/No)', 'Income Level', 'Household Size']

# Split data into features and target
X = df[feature_names]
y = df['Monthly Water Usage (Liters)']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data with feature names
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the model
model = Sequential([
    Dense(256, input_dim=X_train.shape[1], activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    
    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    
    Dense(1, activation='linear')  # Regression output
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=512, validation_split=0.2, verbose=1)

# Save the model
model.save('water_usage_model.h5')

# Save the scaler with feature names
import joblib
joblib.dump(scaler, 'scaler.pkl')


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

  saving_api.save_model(


['scaler.pkl']

In [12]:
import tkinter as tk
from tkinter import messagebox
import numpy as np
import tensorflow as tf
import joblib
import pandas as pd

# Load the trained model and scaler
model = tf.keras.models.load_model('water_usage_model.h5')
scaler = joblib.load('scaler.pkl')

# Define feature names
feature_names = ['Household ID', 'Ward', 'Area', 'Leakage Detected (Yes=1, No=0)', 'Disparity in Supply (Yes=1, No=0)', 'Income Level (Low=0, Medium=1, High=2)', 'Household Size']

# Create the main application window
root = tk.Tk()
root.title("Water Usage Prediction")

# Create a frame for the input fields
frame = tk.Frame(root)
frame.pack(padx=10, pady=10)

# Define the input fields
entries = {}

# Add Household ID
lbl = tk.Label(frame, text='Household ID', anchor='w')
lbl.grid(row=0, column=0, sticky='w')
entries['Household ID'] = tk.Spinbox(frame, from_=1, to=1000000)
entries['Household ID'].grid(row=0, column=1)

# Add Ward
lbl = tk.Label(frame, text='Ward', anchor='w')
lbl.grid(row=1, column=0, sticky='w')
entries['Ward'] = tk.Spinbox(frame, from_=1, to=50)
entries['Ward'].grid(row=1, column=1)

# Add Area
lbl = tk.Label(frame, text='Area', anchor='w')
lbl.grid(row=2, column=0, sticky='w')
entries['Area'] = tk.Spinbox(frame, from_=1, to=100)
entries['Area'].grid(row=2, column=1)

# Add Leakage Detected
lbl = tk.Label(frame, text='Leakage Detected (Yes=1, No=0)', anchor='w')
lbl.grid(row=3, column=0, sticky='w')
entries['Leakage Detected (Yes=1, No=0)'] = tk.IntVar()
options = {'Yes': 1, 'No': 0}
entries['Leakage Detected (Yes=1, No=0)'] = tk.StringVar(value='No')
option_menu = tk.OptionMenu(frame, entries['Leakage Detected (Yes=1, No=0)'], *options.keys())
option_menu.grid(row=3, column=1)

# Add Disparity in Supply
lbl = tk.Label(frame, text='Disparity in Supply (Yes=1, No=0)', anchor='w')
lbl.grid(row=4, column=0, sticky='w')
entries['Disparity in Supply (Yes=1, No=0)'] = tk.StringVar(value='No')
option_menu = tk.OptionMenu(frame, entries['Disparity in Supply (Yes=1, No=0)'], *options.keys())
option_menu.grid(row=4, column=1)

# Add Income Level
lbl = tk.Label(frame, text='Income Level (Low=0, Medium=1, High=2)', anchor='w')
lbl.grid(row=5, column=0, sticky='w')
income_levels = {'Low': 0, 'Medium': 1, 'High': 2}
entries['Income Level (Low=0, Medium=1, High=2)'] = tk.StringVar(value='Medium')
option_menu = tk.OptionMenu(frame, entries['Income Level (Low=0, Medium=1, High=2)'], *income_levels.keys())
option_menu.grid(row=5, column=1)

# Add Household Size
lbl = tk.Label(frame, text='Household Size', anchor='w')
lbl.grid(row=6, column=0, sticky='w')
entries['Household Size'] = tk.Spinbox(frame, from_=1, to=20)
entries['Household Size'].grid(row=6, column=1)

def predict():
    try:
        # Get the input data
        data = [
            int(entries['Household ID'].get()),
            int(entries['Ward'].get()),
            int(entries['Area'].get()),
            options[entries['Leakage Detected (Yes=1, No=0)'].get()],
            options[entries['Disparity in Supply (Yes=1, No=0)'].get()],
            income_levels[entries['Income Level (Low=0, Medium=1, High=2)'].get()],
            int(entries['Household Size'].get())
        ]
        data = np.array(data).reshape(1, -1)
        
        # Create a DataFrame with feature names
        data_df = pd.DataFrame(data, columns=feature_names)
        
        # Scale the data
        data_scaled = scaler.transform(data_df)
        
        # Make prediction
        prediction = model.predict(data_scaled)
        
        # Show the result
        messagebox.showinfo("Prediction", f"Predicted Monthly Water Usage: {prediction[0][0]:.2f} Liters")
    except Exception as e:
        messagebox.showerror("Error", str(e))

# Create the predict button
predict_button = tk.Button(root, text="Predict", command=predict)
predict_button.pack(pady=10)

# Start the main loop
root.mainloop()
