In [1]:
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Diabetes

In [11]:
X = pd.read_csv("diabetes.csv",index_col=False)
X = np.asarray(X)
X

array([[  6.   , 148.   ,  72.   , ...,   0.627,  50.   ,   1.   ],
       [  1.   ,  85.   ,  66.   , ...,   0.351,  31.   ,   0.   ],
       [  8.   , 183.   ,  64.   , ...,   0.672,  32.   ,   1.   ],
       ...,
       [  5.   , 121.   ,  72.   , ...,   0.245,  30.   ,   0.   ],
       [  1.   , 126.   ,  60.   , ...,   0.349,  47.   ,   1.   ],
       [  1.   ,  93.   ,  70.   , ...,   0.315,  23.   ,   0.   ]])

In [12]:
Y = pd.read_csv("diabetes.csv",index_col=False)
Y = np.asarray(Y)

In [13]:
diabetes_X = X[:, np.newaxis, 2]

In [27]:
diabetes_Y = Y[:, np.newaxis, -1]

In [28]:
# splitting into train-test

diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]


In [29]:
count = np.count_nonzero(diabetes_X_test)
print(count)

20


In [30]:
diabetes_Y_train = diabetes_Y[:-20]
diabetes_Y_test = diabetes_Y[-20:]

In [35]:
regr = linear_model.LinearRegression()

In [36]:
diabetes_X_train = diabetes_X_train.reshape(-1,1)
diabetes_Y_train = diabetes_Y_train.reshape(-1,1)

In [37]:
regr.fit(diabetes_X_train, diabetes_Y_train)

In [39]:
diabetes_X_test = diabetes_X_test.reshape(-1,1)
# print(diabetes_X_test)

In [41]:
diabetes_Y_pred = regr.predict(diabetes_X_test)
# print(diabetes_Y_pred)

# PH analysis

In [33]:
df = pd.read_csv("water_potability.csv")
df = df.dropna()
# df['ph'] = df['ph'].round(3)

In [34]:
X = np.asarray(df)
X

array([[8.31676588e+00, 2.14373394e+02, 2.20184174e+04, ...,
        1.00341674e+02, 4.62877054e+00, 0.00000000e+00],
       [9.09222346e+00, 1.81101509e+02, 1.79789863e+04, ...,
        3.19979927e+01, 4.07507543e+00, 0.00000000e+00],
       [5.58408664e+00, 1.88313324e+02, 2.87486877e+04, ...,
        5.49178618e+01, 2.55970823e+00, 0.00000000e+00],
       ...,
       [1.14910109e+01, 9.48125452e+01, 3.71888260e+04, ...,
        4.15585007e+01, 4.36926431e+00, 1.00000000e+00],
       [6.06961576e+00, 1.86659040e+02, 2.61387802e+04, ...,
        6.04199211e+01, 3.66971170e+00, 1.00000000e+00],
       [4.66810169e+00, 1.93681735e+02, 4.75809916e+04, ...,
        6.66876948e+01, 4.43582091e+00, 1.00000000e+00]])

In [35]:
ph_X = X[:, np.newaxis, 0]
ph_X

array([[ 8.31676588],
       [ 9.09222346],
       [ 5.58408664],
       ...,
       [11.49101091],
       [ 6.06961576],
       [ 4.66810169]])

In [36]:
Y = np.asarray(df)

In [37]:
potab_Y = Y[:, np.newaxis, -1]
potab_Y

array([[0.],
       [0.],
       [0.],
       ...,
       [1.],
       [1.],
       [1.]])

In [38]:
#splitting into train and test

potab_Y_train = potab_Y[:-20]
potab_Y_test = potab_Y[-20:]

ph_X_train = ph_X[:-20]
ph_X_test = ph_X[-20:]

In [58]:
potab_Y_train

array([[0.],
       [0.],
       [0.],
       ...,
       [1.],
       [1.],
       [1.]])

In [39]:
regr = linear_model.LinearRegression()


In [40]:
ph_X_train = ph_X_train.reshape(-1,1)
potab_Y_train = potab_Y_train.reshape(-1,1)

In [41]:
regr.fit(ph_X_train, potab_Y_train)

In [42]:
ph_X_test = ph_X_test.reshape(-1,1)

In [43]:
potab_Y_pred = regr.predict(ph_X_test)

In [49]:
potab_Y_pred

array([[0.41128466],
       [0.39407386],
       [0.41183447],
       [0.39841557],
       [0.3884719 ],
       [0.39850748],
       [0.40423495],
       [0.39309314],
       [0.39933448],
       [0.39572672],
       [0.38380268],
       [0.40234486],
       [0.39666499],
       [0.39264065],
       [0.401639  ],
       [0.40473398],
       [0.39580161],
       [0.41450109],
       [0.39332995],
       [0.38785689]])

## Error calculations

In [13]:
mean_test_data = np.mean(potab_Y_test)

In [14]:
sse = np.sum((potab_Y_test - potab_Y_pred) ** 2)
sse

7.239355848383874

In [15]:
ssr = np.sum((potab_Y_pred - mean_test_data) ** 2)
ssr

7.239355848383874

In [16]:
sst = np.sum((potab_Y_test - mean_test_data) ** 2)
sst

0.0

# Predict from user input

In [64]:
user_input = float(input("Enter ph value: "))
user_input = np.array(user_input).reshape(1, -1)
print(user_input)

isDrinkable = regr.predict(user_input)
print(isDrinkable)
if(isDrinkable > 0.4):
    print("Water is drinkable.")
else:
    print("Water is not drinkable.")

Enter ph value:  10


[[10.]]
