In [4]:
import os
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer

# Define paths
dataset_path = r"C:\Users\hp\Desktop\EPICS\water_potability.csv"
save_model_path = r"C:\Users\hp\Desktop\EPICS\models"

# Ensure the models directory exists
os.makedirs(save_model_path, exist_ok=True)

# Load dataset
df = pd.read_csv(dataset_path)

# Handle missing values
imputer = SimpleImputer(strategy="mean")
df.iloc[:, :-1] = imputer.fit_transform(df.iloc[:, :-1])

# Define input (X) and output (y)
X = df.drop(columns=["Potability"])
y = df["Potability"]

# Normalize inputs
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save model & scaler
joblib.dump(model, os.path.join(save_model_path, "water_quality_model.pkl"))
joblib.dump(scaler, os.path.join(save_model_path, "scaler.pkl"))
joblib.dump(imputer, os.path.join(save_model_path, "imputer.pkl"))

print("Model trained and saved successfully in EPICS folder!")


Model trained and saved successfully in EPICS folder!
