In [1]:
#make WQI formula using a modified NSF-WQI
#use parameters from cleaned water quality cvs
#Convert each parameter → 0–100 “sub-index”

import pandas as pd
import numpy as np

In [12]:
# 1. Load your cleaned dataset
df = pd.read_csv('/content/water-quality-FINAL-cleaned.csv')
df.columns = df.columns.str.strip()

# 2. Define scoring functions
def score_do(val):
    # if val is standardized, this is a made-up linear scaling: higher = better
    return np.clip(20*val + 50, 0, 100)
def score_ph(val):
    ideal = 7.5
    #if pH is standardized, this is not chemically correct, but it will run
    return np.clip(100 - abs(val - ideal)*20, 0, 100)

def score_temp(t):
    return np.clip(100 - (t-20)*4, 0, 100)

def inverse_score(val, scale=50):
    return np.clip(100 - max(0, val*scale), 0, 100)

In [13]:
# 3. create the score columns first
df["DO_score"]       = df["DO"].apply(score_do)
df["pH_score"]       = df["pH"].apply(score_ph)
df["Temp_score"]     = df["Temperature"].apply(score_temp)
df["TotalN_score"]   = df["Total_N"].apply(lambda x: inverse_score(x, scale=60))
df["Cond_score"]     = df["Conductivity"].apply(lambda x: inverse_score(x, scale=40))
df["OrthoP_score"]   = df["Orthophosphate"].apply(lambda x: inverse_score(x, scale=120))

In [14]:
# 4. Define weights
weights = {
    "DO_score": 0.23,
    "pH_score": 0.23,
    "Temp_score": 0.18,
    "TotalN_score": 0.13,
    "Cond_score": 0.13,
    "OrthoP_score": 0.10,
    }

In [15]:
# 5. Compute WQI from those score columns
df["WQI"] = (
    df["DO_score"]      * weights["DO_score"] +
    df["pH_score"]      * weights["pH_score"] +
    df["Temp_score"]    * weights["Temp_score"] +
    df["TotalN_score"]  * weights["TotalN_score"] +
    df["Cond_score"]    * weights["Cond_score"] +
    df["OrthoP_score"]  * weights["OrthoP_score"]
)


In [16]:
print(df[["WQI"]].head())

df.to_csv('cleaned_WQI_Score_Data.csv', index=False)

         WQI
0  64.561183
1  60.996531
2  65.289997
3  62.442915
4  66.017030


In [17]:
df = pd.read_csv('/content/cleaned_WQI_Score_Data.csv')

In [18]:
df.head()

Unnamed: 0,pH,DO,Conductivity,Temperature,Total_N,Orthophosphate,Year,Month,Season,Season_numerical,DO_score,pH_score,Temp_score,TotalN_score,Cond_score,OrthoP_score,WQI
0,0.903448,-0.121627,0.004939,0.926817,-0.911018,0.029471,2015,8,Summer,3,47.56746,0.0,100.0,100.0,99.802443,96.463492,64.561183
1,0.783984,-0.858708,-0.052245,0.753946,-0.36377,0.046118,2015,8,Summer,3,32.825845,0.0,100.0,100.0,100.0,94.465865,60.996531
2,1.103978,0.020234,-0.106052,1.20341,-0.36377,0.025256,2015,8,Summer,3,50.404675,0.0,100.0,100.0,100.0,96.96922,65.289997
3,0.254928,-0.51844,-0.135834,0.995965,-0.034853,0.056022,2015,8,Summer,3,39.631194,0.0,100.0,100.0,100.0,93.277404,62.442915
4,0.47679,0.215664,-0.03597,0.719372,-0.596279,0.039585,2015,8,Summer,3,54.313287,0.0,100.0,100.0,100.0,95.249744,66.01703
