<a href="https://colab.research.google.com/github/SalehMehrabani/Artifical-Intelligance/blob/main/NN2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Creating Simple Neural Network Using [Water Quality Dataset](https://www.kaggle.com/datasets/adityakadiwal/water-potability)


# Data Preprocessing


Preprocess Dataset:


*   Filling NaN value with the Mean of Each Features
*   Removing Outliers



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
!pip install -U pandas-profiling

In [None]:
df=pd.read_csv("water_potability.csv")

In [None]:
df

In [None]:
df.isna().sum()

ph                 491
Hardness             0
Solids               0
Chloramines          0
Sulfate            781
Conductivity         0
Organic_carbon       0
Trihalomethanes    162
Turbidity            0
Potability           0
dtype: int64

In [None]:
NaColumns=["ph", "Sulfate", "Trihalomethanes"]

In [None]:
from pandas_profiling import ProfileReport

In [None]:
profile = ProfileReport(df, title="Report")
profile

In [None]:
from sklearn.impute import SimpleImputer

In [None]:
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
df[NaColumns] = imputer.fit_transform(df[NaColumns].values)

In [None]:
df.isna().sum()

ph                 0
Hardness           0
Solids             0
Chloramines        0
Sulfate            0
Conductivity       0
Organic_carbon     0
Trihalomethanes    0
Turbidity          0
Potability         0
dtype: int64

In [None]:
import seaborn as sns

In [None]:
sns.pairplot(df, hue='Potability')

In [None]:
def drop_outliers(df, field_names):
  for field in field_names:
    iqr=1.5*(np.percentile(df[field], 75))- np.percentile(df[field], 25)
    df.drop(df[df[field]>(iqr + np.percentile(df[field],75))].index, inplace=True)
    df.drop(df[df[field]<(np.percentile(df[field],25)-iqr)].index, inplace=True)

In [None]:
df.columns

Index(['ph', 'Hardness', 'Solids', 'Chloramines', 'Sulfate', 'Conductivity',
       'Organic_carbon', 'Trihalomethanes', 'Turbidity', 'Potability'],
      dtype='object')

In [None]:
feature=["ph","Hardness","Solids","Chloramines","Sulfate","Conductivity","Organic_carbon","Trihalomethanes","Turbidity"]
feature

['ph',
 'Hardness',
 'Solids',
 'Chloramines',
 'Sulfate',
 'Conductivity',
 'Organic_carbon',
 'Trihalomethanes',
 'Turbidity']

In [None]:
drop_outliers(df, feature)

In [None]:
df.describe()

# Define Pytorch Data Set and Necessary functions

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader,  Subset

In [None]:
class WaterDataset(Dataset):
  def __init__(self, df):
    super(WaterDataset, self).__init__()
    self.X = df.drop(['Potability'],1)
    self.y = df['Potability']
  def __len__(self):
    return len(self.X)
  def __getitem__(self, index):
    features = self.X.iloc[index]
    targets = self.y.iloc[index]
    return torch.tensor(features, dtype=torch.float), torch.tensor(targets, dtype=torch.long)

In [None]:
from sklearn.model_selection import KFold, train_test_split
df_train, df_test = train_test_split(df, test_size=0.1, shuffle=True)

In [None]:
def train_one_epoch(model, data_loader, criterion, optimizer):
  batch_loss, batch_acc =[], []
  model.train()
  for features, targets in data_loader:
    optimizer.zero_grad()
    outputs = model(features)
    loss = criterion(outputs, targets)
    acc = (torch.argmax(outputs, -1)==targets).sum()/targets.shape[0]
    batch_loss.append(loss.item())
    batch_acc.append(acc.item())
    loss.backward()
    optimizer.step()
  loss_avg = sum(batch_loss)/len(batch_loss)
  acc_avg = sum(batch_acc) /len(batch_acc)
  return loss_avg, acc_avg

In [None]:
def test_one_epoch(model, data_loader, criterion, optimizer):
  batch_loss, batch_acc = [], []
  model.eval()
  for features, targets in data_loader:
    with torch.no_grad():
      outputs = model(features)
      loss = criterion(outputs, targets)
      acc = (torch.argmax(outputs, -1)==targets).sum()/targets.shape[0]
      batch_loss.append(loss.item())
      batch_acc.append(acc.item())
  loss_avg = sum(batch_loss)/len(batch_loss)
  acc_avg = sum(batch_acc) /len(batch_acc)
  return loss_avg, acc_avg

In [None]:
def train_net(fold_number, epochs, model, train_loader, val_loader, criterion, optimizer, verbose=True):
  train_losses =[]
  train_acus=[]
  test_losses = []
  test_acus=[]
  if verbose:
    print("********************")
    print('Fold:{}'.format(fold_number))
  for epoch in range(epochs):
    train_epoch_loss, train_epoch_acc = train_one_epoch(model, train_loader, criterion, optimizer)
    test_epoch_loss, test_epoch_acc = test_one_epoch(model, val_loader, criterion, optimizer)
    if verbose:
      print("Epoch:{}".format(epoch))
      print("train_loss:{:.1f}, train_acc:{:.2f}".format(train_epoch_loss, train_epoch_acc))
      print("test_loss:{:.1f}, test_acc:{:.2f}".format(test_epoch_loss, test_epoch_acc))
    train_losses.append(train_epoch_loss)
    test_losses.append(test_epoch_loss)
    train_acus.append(train_epoch_acc)
    test_acus.append(test_epoch_acc)
  return train_losses, train_acus, test_losses, test_acus

In [None]:
def plot_history(history):
  train_loss = np.array(history['Train Loss'])
  Val_loss = np.array(history['Val Loss'])
  train_acc = np.array(history['Train Acc'])
  Val_acc = np.array(history['Val Acc'])

  avg_train_loss = train_loss.mean(0)
  avg_Val_loss = Val_loss.mean(0)
  avg_train_acc = train_acc.mean(0)
  avg_Val_acc = Val_acc.mean(0)

  x = range(1, len(avg_train_loss)+1)
  plt.figure(figsize=(16, 8)) 

  plt.subplot(121)
  plt.plot(x, avg_train_loss, 'r', label = 'Train')
  plt.plot(x, avg_Val_loss, 'b', label = 'Validation')
  plt.title("Learning Curve For Loss")
  plt.legend()

  plt.subplot(122)
  plt.plot(x, avg_train_acc, 'r', label = 'Train')
  plt.plot(x, avg_Val_acc, 'b', label = 'Validation')
  plt.title("Learning Curve For Accuracy")
  plt.legend()



  plt.show()

In [None]:
def weight_init(m):
  if type(m) == nn.Linear:
    nn.init.uniform_(m.weight)
    nn.init.zeros_(m.bias)

# Creating Simple Neural Network


In [None]:
train_dataset = WaterDataset(df_train)
test_dataset = WaterDataset(df_test)