In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
import pandas as pd
df = pd.read_csv("water_potability.csv")
df

Unnamed: 0,ph,Hardness,Solids,Chloramines,Sulfate,Conductivity,Organic_carbon,Trihalomethanes,Turbidity,Potability
0,,204.890455,20791.318981,7.300212,368.516441,564.308654,10.379783,86.990970,2.963135,0
1,3.716080,129.422921,18630.057858,6.635246,,592.885359,15.180013,56.329076,4.500656,0
2,8.099124,224.236259,19909.541732,9.275884,,418.606213,16.868637,66.420093,3.055934,0
3,8.316766,214.373394,22018.417441,8.059332,356.886136,363.266516,18.436524,100.341674,4.628771,0
4,9.092223,181.101509,17978.986339,6.546600,310.135738,398.410813,11.558279,31.997993,4.075075,0
...,...,...,...,...,...,...,...,...,...,...
3271,4.668102,193.681735,47580.991603,7.166639,359.948574,526.424171,13.894419,66.687695,4.435821,1
3272,7.808856,193.553212,17329.802160,8.061362,,392.449580,19.903225,,2.798243,1
3273,9.419510,175.762646,33155.578218,7.350233,,432.044783,11.039070,69.845400,3.298875,1
3274,5.126763,230.603758,11983.869376,6.303357,,402.883113,11.168946,77.488213,4.708658,1


In [3]:
import pandas as pd

# Assuming df is your DataFrame
X = df['Solids'].values.reshape(-1, 1)  # Reshape to (n_samples, 1)
y = df['Potability'].values

# Normalize/Scale X
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)


In [4]:
print(X_train)
print(y_train)

[[-0.55283024]
 [ 1.38505418]
 [-0.42120077]
 ...
 [ 1.01798723]
 [-0.6650338 ]
 [ 1.4442152 ]]
[0 0 0 ... 0 0 1]


In [5]:
print(len(X_train))
print(len(y_train))

2620
2620


In [6]:
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [7]:
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [8]:
class SimpleClassifier(nn.Module):
    def __init__(self):
        super(SimpleClassifier, self).__init__()
        self.fc = nn.Linear(1, 1)

    def forward(self, x):
        x = self.fc(x)
        return x

In [9]:
model = SimpleClassifier()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [10]:
num_epochs = 1000

for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels.float())
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}')

Epoch 1/1000, Loss: 0.7286791205406189
Epoch 2/1000, Loss: 0.6271010637283325
Epoch 3/1000, Loss: 0.6785622835159302
Epoch 4/1000, Loss: 0.7262812852859497
Epoch 5/1000, Loss: 0.6594483852386475
Epoch 6/1000, Loss: 0.7231578826904297
Epoch 7/1000, Loss: 0.6905652284622192
Epoch 8/1000, Loss: 0.6876734495162964
Epoch 9/1000, Loss: 0.618764340877533
Epoch 10/1000, Loss: 0.6702908277511597
Epoch 11/1000, Loss: 0.6754021048545837
Epoch 12/1000, Loss: 0.670549213886261
Epoch 13/1000, Loss: 0.6224786043167114
Epoch 14/1000, Loss: 0.6520076990127563
Epoch 15/1000, Loss: 0.6444990634918213
Epoch 16/1000, Loss: 0.6445291042327881
Epoch 17/1000, Loss: 0.6727052330970764
Epoch 18/1000, Loss: 0.6869109869003296
Epoch 19/1000, Loss: 0.6798868179321289
Epoch 20/1000, Loss: 0.680230975151062
Epoch 21/1000, Loss: 0.69248366355896
Epoch 22/1000, Loss: 0.6333362460136414
Epoch 23/1000, Loss: 0.6764875054359436
Epoch 24/1000, Loss: 0.6350467801094055
Epoch 25/1000, Loss: 0.7332281470298767
Epoch 26/1000,

In [11]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        outputs = model(inputs)
        predicted = (outputs.squeeze() > 0.5).int()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy * 100:.2f}%')

Test Accuracy: 62.80%


In [12]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build the model
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(1,), dtype=tf.float64),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Convert data types to float64 and int64
X_train_scaled = X_train_scaled.astype('float64')
y_train = y_train.astype('int64')
X_test_scaled = X_test_scaled.astype('float64')
y_test = y_test.astype('int64')

# Train the model
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_data=(X_test_scaled, y_test))

2023-11-29 21:24:59.136675: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-29 21:24:59.138225: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-29 21:24:59.172799: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-29 21:24:59.172826: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-29 21:24:59.173524: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f2310f9d090>