In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
df = pd.read_csv('Water_Potability_CSV_File.csv')
df.head()

Unnamed: 0,ph,Temperature,Solids,Turbidity,Potability
0,,14,20791.31898,2.963135,0
1,3.71608,-13,18630.05786,4.500656,0
2,8.099124,6,19909.54173,3.055934,0
3,8.316766,-15,22018.41744,4.628771,0
4,9.092223,4,17978.98634,4.075075,0


##*Filling Null Values*

In [3]:
df["ph"] = df["ph"].fillna(df["ph"].mean())

##*Normalizing*

In [4]:
def normalize_df(df):
    out = df.copy()
    for features in df.columns:
        if features != 'Potability':
            max_value = df[features].max()
            min_value = df[features].min()
            print("features {}, max value {}, min value {}".format(features, max_value, min_value))
            out[features] = (df[features] - min_value) / (max_value - min_value)
            
    return out

df = normalize_df(df)
df.head()

features ph, max value 14.0, min value 0.0
features Temperature, max value 42, min value -15
features Solids, max value 61227.19601, min value 320.9426113
features Turbidity, max value 6.739, min value 1.45


Unnamed: 0,ph,Temperature,Solids,Turbidity,Potability
0,0.505771,0.508772,0.336096,0.286091,0
1,0.265434,0.035088,0.300611,0.576793,0
2,0.578509,0.368421,0.321619,0.303637,0
3,0.594055,0.0,0.356244,0.601015,0
4,0.649445,0.333333,0.289922,0.496327,0


In [5]:
df.to_csv('Water_Potability_Normalized.csv',index=False)

##*Model Training*

In [6]:
df_features = df.loc[:, df.columns != 'Potability']
df_label = df.Potability

In [7]:
from sklearn.model_selection import train_test_split
df_train_X, df_test_X, df_train_Y, df_test_Y = train_test_split(df_features, df_label, test_size=0.2, random_state=1234)

#Machine Learning
from xgboost import XGBClassifier

#creating model object
model_xgb = XGBClassifier(n_estimators=200,learning_rate=0.03)

#training model
model_xgb.fit(df_train_X,df_train_Y)

In [8]:
import time
from Adafruit_IO import Client
import requests

ADAF_Username = 'dineshram'
ADAF_Key = 'aio_GSvf4292LhXOCUmMvSpGuP9PGpdF'

aio = Client(ADAF_Username, ADAF_Key)

# Website host address
host= "http://localhost/Water-Quality-Monitoring-System-Website/" # End url with a slash '/'

while True:
	time.sleep(5)

	tds = (aio.receive('ch3').value)
	turb = (aio.receive('ch4').value)
	temp = (aio.receive('ch1').value)
	ph = (aio.receive('ch2').value)

	TDS = (tds.split('$'))
	TURB = (turb.split('$'))
	TEMP = (temp.split('$'))
	PH = (ph.split('$'))

	arr = [0]*4
	arr[0] = int(TEMP[0])
	arr[1] = int(TURB[0])
	arr[2] = int(PH[0])
	arr[3] = float(TDS[0])
	print(arr)

	data = pd.read_csv('MeasuredData.csv')

	Wtemp = 1
	Wturb = 1
	Wph = 1
	Wtds = 1

	if(arr[2]<=2 or arr[2]>=12):
		Qph = 0
	elif(arr[2]==3 or arr[2]==11):
		Qph = 5
	elif(arr[2]==4):
		Qph = 10
	elif(arr[2]==10):
		Qph = 20
	elif(arr[2]==5):
		Qph = 25
	elif(arr[2]==9):
		Qph = 50
	elif(arr[2]==6):
		Qph = 75
	elif(arr[2]==8):
		Qph = 80
	elif(arr[2]==7):
		Qph = 100

	Qtemp = 100

	if(arr[1]>=0 and arr[1]<=10):
		Qturb = 95
	elif(arr[1]<20 and arr[1]>10):
		Qturb = 70
	elif(arr[1]<30 and arr[1]>=20):
		Qturb = 55
	elif(arr[1]<40 and arr[1]>=30):
		Qturb = 49
	elif(arr[1]<50 and arr[1]>=40):
		Qturb = 42
	elif(arr[1]<60 and arr[1]>=50):
		Qturb = 35
	elif(arr[1]<70 and arr[1]>=60):
		Qturb = 32
	elif(arr[1]<80 and arr[1]>=70):
		Qturb = 27
	elif(arr[1]<90 and arr[1]>=80):
		Qturb = 22
	elif(arr[1]<=100 and arr[1]>=90):
		Qturb = 18
	else:
		Qturb = 5

	if(arr[3]>=0 and arr[3]<=50):
		Qtds = 85
	elif(arr[3]<100 and arr[3]>50):
		Qtds = 95
	elif(arr[3]<150 and arr[3]>=100):
		Qtds = 80
	elif(arr[3]<200 and arr[3]>=150):
		Qtds = 75
	elif(arr[3]<250 and arr[3]>=200):
		Qtds = 68
	elif(arr[3]<300 and arr[3]>=250):
		Qtds = 63
	elif(arr[3]<350 and arr[3]>=300):
		Qtds = 55
	elif(arr[3]<400 and arr[3]>=350):
		Qtds = 50
	elif(arr[3]<450 and arr[3]>=400):
		Qtds = 42
	elif(arr[3]<=500 and arr[3]>=450):
		Qtds = 35
	elif(arr[3]>500):
		Qtds = 20

	WQI = ((Qtemp*Wtemp)+(Qturb*Wturb)+(Qph*Wph)+(Qtds*Wtds))/(Wtemp+Wturb+Wph+Wtds)

	def push_data(data):
		for features in data.columns:
			out = data.copy()
			out["ph"] = arr[2]
			out["Temperature"] = arr[0]
			out["Solids"] = arr[3]
			out["Turbidity"] = arr[1]
		return out

	data = push_data(data)
	data.to_csv('MeasuredData.csv',index=False)

	def normalize_data(df):
		for features in df.columns:
			out = df.copy()
			out["ph"] = (df["ph"]-0.0)/(14.0-0.0)
			out["Temperature"] = (df["Temperature"]+15.0)/(42.0+15.0)
			out["Solids"] = (df["Solids"]-320.9426113)/(61227.19601-320.9426113)
			out["Turbidity"] = (df["Turbidity"]-1.45)/(20.739-1.45)      
		return out

	data = normalize_data(data)

	#prediction

	pred_xgb = model_xgb.predict(data)
	pred_xgb

	res = pred_xgb[0]
	if res == 1:
		outcome = 'Potable'
	else:
		outcome = 'Not potable'
    
	wqistatus = ''
	if (WQI>=90 and WQI<100):
		wqistatus = 'Excellent'
	elif (WQI>=70 and WQI<90):
		wqistatus = 'Good'
	elif (WQI>=50 and WQI<70):
		wqistatus = 'Moderate'
	elif (WQI>=25 and WQI<50):
		wqistatus = 'Bad'
	elif (WQI>=0 and WQI<25):
		wqistatus = 'Very Bad'

	print(outcome)
	print(WQI)
	print(wqistatus)

	# send to web server (php) --/
	userdata = {"temperature": arr[0], "turbidity": arr[1], "ph": arr[2], "solids": arr[3], "result": outcome, "wqi": WQI, "rating": wqistatus}
	resp = requests.post(host + "insert_data.php", params=userdata)

	time.sleep(10)

[31, 10, 6, 1028.79]
Potable
72.5
Good
[31, 10, 6, 1028.79]
Potable
72.5
Good
