In [12]:
# Importing required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
import joblib
from google.colab import files
from tabulate import tabulate  # For displaying results in a table format

# Load the dataset
df = pd.read_csv("/content/water_potability.csv")

# Splitting features and target variable
X = df.drop("Potability", axis=1)
y = df["Potability"]

# Handling missing values
def fill_missing(df):
    for column in df.columns:
        if df[column].isna().sum():
            df[column] = df[column].fillna(df[column].median())
    return df

X = fill_missing(X)

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Scaling the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save scaler for later use
joblib.dump(scaler, "scaler.pkl")

# Initializing models
models = {
    "RandomForest": RandomForestClassifier(
        n_estimators=1000,  # Increased number of trees for better performance
        max_depth=None,    # No limit on tree depth
        min_samples_split=2,  # Minimum samples to split a node
        min_samples_leaf=1,   # Minimum samples required at a leaf node
        random_state=42
    ),
    "MLPClassifier": MLPClassifier(
        hidden_layer_sizes=(64, 32),  # Increased complexity for better performance
        activation='relu',
        solver='adam',
        max_iter=300,  # Increased iterations
        random_state=42
    ),
    "LinearSVC": LinearSVC(max_iter=10000, random_state=42),
    "LogisticRegression": LogisticRegression(random_state=42),
    "KNeighbors": KNeighborsClassifier(n_neighbors=5),
    "GradientBoosting": GradientBoostingClassifier(
        n_estimators=50,  # Reduced number of estimators
        learning_rate=0.05,  # Lower learning rate
        max_depth=2,  # Shallower trees
        random_state=42
    ),
    "XGBoost": XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, eval_metric="logloss", random_state=42)
}

# Training and evaluating models
results = []

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    train_score = model.score(X_train_scaled, y_train)
    test_score = accuracy_score(y_test, model.predict(X_test_scaled))
    results.append([name, train_score, test_score])

# Display results in a table format
headers = ["Model", "Train Accuracy", "Test Accuracy"]
print(tabulate(results, headers=headers, tablefmt="grid", floatfmt=".4f"))

# Save the Random Forest model
joblib.dump(models["RandomForest"], "best_rf_model.pkl")

# Optional: Download the saved model file
files.download("best_rf_model.pkl")



+--------------------+------------------+-----------------+
| Model              |   Train Accuracy |   Test Accuracy |
| RandomForest       |           1.0000 |          0.6890 |
+--------------------+------------------+-----------------+
| MLPClassifier      |           0.8884 |          0.6463 |
+--------------------+------------------+-----------------+
| LinearSVC          |           0.6085 |          0.6220 |
+--------------------+------------------+-----------------+
| LogisticRegression |           0.6085 |          0.6220 |
+--------------------+------------------+-----------------+
| KNeighbors         |           0.7656 |          0.6341 |
+--------------------+------------------+-----------------+
| GradientBoosting   |           0.6442 |          0.6433 |
+--------------------+------------------+-----------------+
| XGBoost            |           0.7293 |          0.6677 |
+--------------------+------------------+-----------------+


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import gradio as gr
import numpy as np
import joblib
from tinydb import TinyDB, Query
import pandas as pd
import matplotlib.pyplot as plt
import io
import base64
import re
from fpdf import FPDF
from datetime import datetime

# Load the trained model
model = joblib.load("best_rf_model.pkl")

# Initialize database for storing user queries
db = TinyDB("user_queries.json")

# Initialize database for storing user credentials
user_db = TinyDB("user_credentials.json")

# Feature columns for reference
features = ['pH', 'Hardness', 'Solids', 'Chloramines', 'Sulfate', 'Conductivity', 'Organic_carbon', 'Trihalomethanes', 'Turbidity']

# Default values for potable water (default values for sliders)
default_values = {
    'pH': 7.5,
    'Hardness': 150,
    'Solids': 500,
    'Chloramines': 4,
    'Sulfate': 250,
    'Conductivity': 500,
    'Organic_carbon': 2,
    'Trihalomethanes': 80,
    'Turbidity': 1
}

# Global variable to track login status
logged_in_user = None

# Function to validate password
def validate_password(password):
    if len(password) < 8:
        return False
    if not re.search("[!@#$%^&*(),.?\":{}|<>]", password):
        return False
    return True

# Function to handle user signup
def signup(username, password):
    if not validate_password(password):
        return "Password must be at least 8 characters long and contain at least one special character."
    User = Query()
    if user_db.search(User.username == username):
        return "Username already exists. Please choose a different username."
    user_db.insert({"username": username, "password": password})
    return "Signup successful! Please login."

# Function to handle user login
def login(username, password):
    global logged_in_user
    User = Query()
    user = user_db.search((User.username == username) & (User.password == password))
    if user:
        logged_in_user = username
        return "Login successful!", gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
    else:
        return "Invalid username or password.", gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)

# Function to handle user logout
def logout():
    global logged_in_user
    logged_in_user = None
    return "Logout successful!", gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)

# Function to check if the user is logged in
def is_logged_in():
    return logged_in_user is not None

# Function to validate inputs and handle predictions
def predict_water_quality(pH, Hardness, Solids, Chloramines, Sulfate, Conductivity, Organic_carbon, Trihalomethanes, Turbidity):
    if not is_logged_in():
        return "🔒 Please login to access the prediction system.", "", "", "", ""

    # Check if inputs are within the potable water range
    if (6.5 <= pH <= 8.5 and
        0 <= Hardness <= 300 and
        Solids <= 500 and
        Chloramines <= 4 and
        Sulfate <= 250 and
        50 <= Conductivity <= 500 and
        Organic_carbon <= 2 and
        Trihalomethanes <= 80 and
        Turbidity <= 1):
        prediction = 1  # Potable
        recommendations = ["✅ Water is POTABLE. No immediate action needed."]
    else:
        prediction = 0  # Not Potable
        recommendations = ["🔴 Water is NOT potable. Consider treatment like filtration or chemical disinfection."]
        if pH < 6.5:
            recommendations.append("⚠️ Low pH: Water is too acidic. Use pH stabilizers.")
        if Conductivity > 500:
            recommendations.append("⚠️ High Conductivity: High dissolved solids, consider filtration.")
        if Turbidity > 5:
            recommendations.append("⚠️ High Turbidity: May contain pathogens, use sediment filters.")

    # Store the user's input and prediction in the database
    db.insert({
        "username": logged_in_user,
        "pH": pH, "Hardness": Hardness, "Solids": Solids, "Chloramines": Chloramines,
        "Sulfate": Sulfate, "Conductivity": Conductivity, "Organic_carbon": Organic_carbon,
        "Trihalomethanes": Trihalomethanes, "Turbidity": Turbidity, "Prediction": int(prediction),
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    })

    # Generate visualization of the user's input
    fig, ax = plt.subplots(figsize=(6, 4))
    params = [pH, Hardness, Solids, Chloramines, Sulfate, Conductivity, Organic_carbon, Trihalomethanes, Turbidity]
    ax.bar(features, params, color='teal')
    ax.set_title("Water Quality Parameters")
    ax.set_ylabel("Value")
    ax.set_xlabel("Parameters")
    plt.xticks(rotation=45)

    # Convert plot to image
    buf = io.BytesIO()
    plt.savefig(buf, format="png")
    buf.seek(0)
    img_str = base64.b64encode(buf.read()).decode("utf-8")
    img_tag = f'<img src="data:image/png;base64,{img_str}" width="500"/>'

    # Return the potability score, recommendations, prediction, and chart
    probability = model.predict_proba(np.array([[pH, Hardness, Solids, Chloramines, Sulfate, Conductivity, Organic_carbon, Trihalomethanes, Turbidity]]))[0][1] * 100
    return f"Potability Score: {probability:.2f}%", '\n'.join(recommendations), prediction, probability, img_tag

# Function to generate a PDF report
def generate_report(pH, Hardness, Solids, Chloramines, Sulfate, Conductivity, Organic_carbon, Trihalomethanes, Turbidity):
    if not is_logged_in():  # Ensure this function is defined elsewhere
        return "🔒 Please login to access the report generation."

    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    # Title
    pdf.cell(200, 10, txt="Water Quality Report", ln=True, align="C")
    pdf.cell(200, 10, txt="----------------------------------------", ln=True, align="C")

    # Report Content
    pdf.cell(200, 10, txt=f"pH: {pH}", ln=True)
    pdf.cell(200, 10, txt=f"Hardness: {Hardness} mg/L", ln=True)
    pdf.cell(200, 10, txt=f"Solids: {Solids} mg/L", ln=True)
    pdf.cell(200, 10, txt=f"Chloramines: {Chloramines} mg/L", ln=True)
    pdf.cell(200, 10, txt=f"Sulfate: {Sulfate} mg/L", ln=True)
    pdf.cell(200, 10, txt=f"Conductivity: {Conductivity} µS/cm", ln=True)
    pdf.cell(200, 10, txt=f"Organic Carbon: {Organic_carbon} mg/L", ln=True)
    pdf.cell(200, 10, txt=f"Trihalomethanes: {Trihalomethanes} µg/L", ln=True)
    pdf.cell(200, 10, txt=f"Turbidity: {Turbidity} NTU", ln=True)

    # Footer
    pdf.cell(200, 10, txt="----------------------------------------", ln=True, align="C")
    pdf.cell(200, 10, txt="Thank you for using our system!", ln=True, align="C")

    # Save the PDF
    pdf_output = "water_quality_report.pdf"
    pdf.output(pdf_output)

    return pdf_output

# Function to display water quality standards
def display_water_standards():
    if not is_logged_in():
        return pd.DataFrame({"Error": ["🔒 Please login to access the water quality standards."]})
    standards = {
        "Parameter": ["pH", "Hardness", "Solids", "Chloramines", "Sulfate", "Conductivity", "Organic Carbon", "Trihalomethanes", "Turbidity"],
        "Standard Value": ["6.5 - 8.5", "0 - 300 mg/L", "< 500 mg/L", "< 4 mg/L", "< 250 mg/L", "50 - 500 µS/cm", "< 2 mg/L", "< 80 µg/L", "< 1 NTU"]
    }
    return pd.DataFrame(standards)

# Function to display user dashboard in table format
def get_user_dashboard():
    if not is_logged_in():
        return pd.DataFrame({"Error": ["🔒 Please login to access the dashboard."]})
    User = Query()
    history = db.search(User.username == logged_in_user)
    if not history:
        return pd.DataFrame({"Message": ["No historical data found."]})
    df = pd.DataFrame(history)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.set_index('timestamp', inplace=True)
    return df

# Function to visualize user input data as a bar graph
def visualize_input_data(pH, Hardness, Solids, Chloramines, Sulfate, Conductivity, Organic_carbon, Trihalomethanes, Turbidity):
    if not is_logged_in():
        return "🔒 Please login to access the visualization."
    fig, ax = plt.subplots(figsize=(8, 5))
    params = [pH, Hardness, Solids, Chloramines, Sulfate, Conductivity, Organic_carbon, Trihalomethanes, Turbidity]
    ax.bar(features, params, color='teal')
    ax.set_title("Water Quality Parameters")
    ax.set_ylabel("Value")
    ax.set_xlabel("Parameters")
    plt.xticks(rotation=45)
    plt.tight_layout()
    buf = io.BytesIO()
    plt.savefig(buf, format="png")
    buf.seek(0)
    img_str = base64.b64encode(buf.read()).decode("utf-8")
    img_tag = f'<img src="data:image/png;base64,{img_str}" width="600"/>'
    return img_tag

# Function to create a gallery of water quality visualizations
def create_visualization_gallery():
    if not is_logged_in():
        return "🔒 Please login to view the gallery."
    User = Query()
    history = db.search(User.username == logged_in_user)
    if not history:
        return "No historical data found."
    df = pd.DataFrame(history)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.set_index('timestamp', inplace=True)

    # Create a gallery of plots
    gallery_html = "<div style='display: flex; flex-wrap: wrap; gap: 20px;'>"
    for feature in features:
        fig, ax = plt.subplots(figsize=(6, 4))
        ax.plot(df.index, df[feature], label=feature, color='blue')
        ax.set_title(f"{feature} Over Time")
        ax.set_ylabel(feature)
        ax.set_xlabel("Timestamp")
        ax.legend()
        plt.tight_layout()
        buf = io.BytesIO()
        plt.savefig(buf, format="png")
        buf.seek(0)
        img_str = base64.b64encode(buf.read()).decode("utf-8")
        gallery_html += f"<div style='flex: 1 1 300px;'><img src='data:image/png;base64,{img_str}' width='100%'/></div>"
        plt.close()
    gallery_html += "</div>"
    return gallery_html

# Custom CSS for better styling
custom_css = """
    .gradio-container {
        font-family: Arial, sans-serif;
    }
    .gradio-button {
        background-color: #4CAF50;
        color: white;
        border: none;
        padding: 10px 20px;
        text-align: center;
        text-decoration: none;
        display: inline-block;
        font-size: 16px;
        margin: 4px 2px;
        cursor: pointer;
        border-radius: 5px;
    }
    .gradio-button:hover {
        background-color: #45a049;
    }
    .gradio-slider {
        width: 100%;
    }
    .gradio-textbox {
        width: 100%;
        padding: 10px;
        margin: 5px 0;
        box-sizing: border-box;
        border: 2px solid #ccc;
        border-radius: 4px;
    }
    .gradio-output {
        background-color: #f9f9f9;
        padding: 15px;
        border-radius: 5px;
        margin-top: 10px;
    }
"""

# Create the main interface with signup, login, and water potability prediction
with gr.Blocks(css=custom_css) as main_ui:
    gr.Markdown("# Water Potability Prediction System")
    with gr.Tabs():
        with gr.TabItem("Signup"):
            gr.Markdown("## Signup")
            username = gr.Textbox(label="Username")
            password = gr.Textbox(label="Password", type="password")
            signup_button = gr.Button("Signup")
            signup_output = gr.Textbox(label="Signup Status", interactive=False)
            signup_button.click(signup, inputs=[username, password], outputs=signup_output)

        with gr.TabItem("Login"):
            gr.Markdown("## Login")
            username = gr.Textbox(label="Username")
            password = gr.Textbox(label="Password", type="password")
            login_button = gr.Button("Login")
            login_output = gr.Textbox(label="Login Status", interactive=False)
            water_ui_visibility = gr.Textbox(visible=False)  # Placeholder for water_ui visibility
            login_ui_visibility = gr.Textbox(visible=False)  # Placeholder for login_ui visibility
            logout_button = gr.Button("Logout", visible=False)
            login_button.click(login, inputs=[username, password], outputs=[login_output, water_ui_visibility, login_ui_visibility, logout_button, login_button])
            logout_button.click(logout, outputs=[login_output, water_ui_visibility, login_ui_visibility, logout_button, login_button])

        with gr.TabItem("Water Potability Prediction"):
            gr.Markdown("## 🌊 Water Potability Prediction System")
            gr.Markdown("### Enter Water Parameters:")
            with gr.Row():
                pH = gr.Slider(minimum=0, maximum=14, step=0.1, label="pH Level", value=7.5, info="pH level of the water (6.5 to 8.5 is ideal).")
                Hardness = gr.Slider(minimum=0, maximum=500, step=1, label="Hardness (mg/L)", value=150, info="Hardness of the water (0-300 mg/L is optimal).")
            with gr.Row():
                Solids = gr.Slider(minimum=0, maximum=50000, step=10, label="Solids (mg/L)", value=500, info="Total dissolved solids (less than 500 mg/L is optimal).")
                Chloramines = gr.Slider(minimum=0, maximum=10, step=0.1, label="Chloramines (mg/L)", value=4, info="Chloramines level (less than 4 mg/L is ideal).")
            with gr.Row():
                Sulfate = gr.Slider(minimum=0, maximum=500, step=1, label="Sulfate (mg/L)", value=250, info="Sulfate level (less than 250 mg/L is ideal).")
                Conductivity = gr.Slider(minimum=0, maximum=2000, step=5, label="Conductivity (μS/cm)", value=500, info="Electrical conductivity of water (50-500 µS/cm is optimal).")
            with gr.Row():
                Organic_carbon = gr.Slider(minimum=0, maximum=50, step=0.5, label="Organic Carbon (mg/L)", value=2, info="Organic carbon level (less than 2 mg/L is ideal).")
                Trihalomethanes = gr.Slider(minimum=0, maximum=150, step=1, label="Trihalomethanes (μg/L)", value=80, info="Trihalomethanes level (less than 80 µg/L is optimal).")
            Turbidity = gr.Slider(minimum=0, maximum=10, step=0.1, label="Turbidity (NTU)", value=1, info="Turbidity level (less than 1 NTU is ideal).")
            submit = gr.Button("🔍 Predict")
            reset = gr.Button("🔄 Reset")
            potability_score = gr.Textbox(label="Potability Score", interactive=False, elem_classes=["output"])
            recommendations = gr.Textbox(label="Recommendations", interactive=False, elem_classes=["output"])
            prediction_output = gr.Textbox(label="Prediction (0: Not Potable, 1: Potable)", interactive=False, elem_classes=["output"])
            water_chart = gr.HTML(label="Water Quality Chart")
            submit.click(predict_water_quality, inputs=[pH, Hardness, Solids, Chloramines, Sulfate, Conductivity, Organic_carbon, Trihalomethanes, Turbidity],
                         outputs=[potability_score, recommendations, prediction_output, water_chart])
            reset.click(lambda: [7.5, 150, 500, 4, 250, 500, 2, 80, 1], outputs=[pH, Hardness, Solids, Chloramines, Sulfate, Conductivity, Organic_carbon, Trihalomethanes, Turbidity])

        with gr.TabItem("Water Quality Standards"):
            gr.Markdown("## Water Quality Standards")
            standards_output = gr.Dataframe(label="Standards", interactive=False,)
            refresh_standards= gr.Button("🔄 Refresh water quality standards")
            refresh_standards.click(display_water_standards, outputs=standards_output)

        with gr.TabItem("User Dashboard"):
            gr.Markdown("## User Dashboard")
            dashboard_output = gr.Dataframe(label="Dashboard", interactive=False)
            refresh_dashboard = gr.Button("🔄 Refresh Dashboard")
            refresh_dashboard.click(get_user_dashboard, outputs=dashboard_output)

        with gr.TabItem("Data Visualization Gallery"):
            gr.Markdown("## Water Quality Data Visualization Gallery")
            gallery_output = gr.HTML()
            refresh_gallery = gr.Button("🔄 Refresh Gallery")
            refresh_gallery.click(create_visualization_gallery, outputs=gallery_output)

        with gr.TabItem("Generate Report"):
            gr.Markdown("## Generate Water Quality Report")
            report_output = gr.File(label="Download Report")
            generate_report_button = gr.Button("Generate Report")
            generate_report_button.click(generate_report, inputs=[pH, Hardness, Solids, Chloramines, Sulfate, Conductivity, Organic_carbon, Trihalomethanes, Turbidity], outputs=report_output)


        with gr.TabItem("Visualize Input Data"):
            gr.Markdown("## Visualize Input Data")
            visualize_output = gr.HTML()
            visualize_button = gr.Button("Visualize Data")
            visualize_button.click(visualize_input_data, inputs=[pH, Hardness, Solids, Chloramines, Sulfate, Conductivity, Organic_carbon, Trihalomethanes, Turbidity], outputs=visualize_output)

main_ui.launch(pwa=True)

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f560720145bf60ac2a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [13]:
!pip install gradio
!pip install tinydb
!pip install fpdf



In [14]:
!pip install matplotlib
!pip install gradio huggingface_hub
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): Traceback (most recent call last):
  File "/usr/local/bin/huggingface-cli", line 10, in <module>
    sys.exit(main())
             ^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/huggingface_hub/commands/huggingface_cli.py", line 57, in main
    service.run()
  File "

In [None]:
!gradio deploy

Space available at 
[4;94mhttps://huggingface.co/spaces/AllaVyjayanthi/Enhanced_Water_Potability_Prediction_SystemEnhanced_Wat[0m
[4;94mer_Potability_Prediction_System[0m
