# Excercise - Creating our own custom Model

This is a notebook that provides a quick overview of how to create your own custom model. You will be creating a simple model.
You will be utilizing Keras and Tensorflow


## Water Quality Dataset

This dataset contains water quality measurements and assessments related to potability, which is the suitability of water for human consumption. The dataset's primary objective is to provide insights into water quality parameters and assist in determining whether the water is potable or not. Each row in the dataset represents a water sample with specific attributes, and the "Potability" column indicates whether the water is suitable for consumption.

https://www.kaggle.com/datasets/uom190346a/water-quality-and-potability?select=water_potability.csv


In [2]:
#LOAD THE DATA

import pandas as pd
import numpy as np
import tensorflow as tf




data = pd.read_csv("/content/water_potability.csv")

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer


data.head(20)









Unnamed: 0,ph,Hardness,Solids,Chloramines,Sulfate,Conductivity,Organic_carbon,Trihalomethanes,Turbidity,Potability
0,,204.890455,20791.318981,7.300212,368.516441,564.308654,10.379783,86.99097,2.963135,0
1,3.71608,129.422921,18630.057858,6.635246,,592.885359,15.180013,56.329076,4.500656,0
2,8.099124,224.236259,19909.541732,9.275884,,418.606213,16.868637,66.420093,3.055934,0
3,8.316766,214.373394,22018.417441,8.059332,356.886136,363.266516,18.436524,100.341674,4.628771,0
4,9.092223,181.101509,17978.986339,6.5466,310.135738,398.410813,11.558279,31.997993,4.075075,0
5,5.584087,188.313324,28748.687739,7.544869,326.678363,280.467916,8.399735,54.917862,2.559708,0
6,10.223862,248.071735,28749.716544,7.513408,393.663396,283.651634,13.789695,84.603556,2.672989,0
7,8.635849,203.361523,13672.091764,4.563009,303.309771,474.607645,12.363817,62.798309,4.401425,0
8,,118.988579,14285.583854,7.804174,268.646941,389.375566,12.706049,53.928846,3.595017,0
9,11.180284,227.231469,25484.508491,9.0772,404.041635,563.885481,17.927806,71.976601,4.370562,0


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# Information on the data
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3276 entries, 0 to 3275
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   ph               2785 non-null   float64
 1   Hardness         3276 non-null   float64
 2   Solids           3276 non-null   float64
 3   Chloramines      3276 non-null   float64
 4   Sulfate          2495 non-null   float64
 5   Conductivity     3276 non-null   float64
 6   Organic_carbon   3276 non-null   float64
 7   Trihalomethanes  3114 non-null   float64
 8   Turbidity        3276 non-null   float64
 9   Potability       3276 non-null   int64  
dtypes: float64(9), int64(1)
memory usage: 256.1 KB


In [6]:
# Brief overview of the dataset statistics
data.describe()

Unnamed: 0,ph,Hardness,Solids,Chloramines,Sulfate,Conductivity,Organic_carbon,Trihalomethanes,Turbidity,Potability
count,2785.0,3276.0,3276.0,3276.0,2495.0,3276.0,3276.0,3114.0,3276.0,3276.0
mean,7.080795,196.369496,22014.092526,7.122277,333.775777,426.205111,14.28497,66.396293,3.966786,0.39011
std,1.59432,32.879761,8768.570828,1.583085,41.41684,80.824064,3.308162,16.175008,0.780382,0.487849
min,0.0,47.432,320.942611,0.352,129.0,181.483754,2.2,0.738,1.45,0.0
25%,6.093092,176.850538,15666.690297,6.127421,307.699498,365.734414,12.065801,55.844536,3.439711,0.0
50%,7.036752,196.967627,20927.833607,7.130299,333.073546,421.884968,14.218338,66.622485,3.955028,0.0
75%,8.062066,216.667456,27332.762127,8.114887,359.95017,481.792304,16.557652,77.337473,4.50032,1.0
max,14.0,323.124,61227.196008,13.127,481.030642,753.34262,28.3,124.0,6.739,1.0


In [7]:
# drop duplicates rows of data

data = data.drop_duplicates()

In [8]:
# percentage of missingness in the data for each column

missing = data.isnull().mean()*100
print(missing)

ph                 14.987790
Hardness            0.000000
Solids              0.000000
Chloramines         0.000000
Sulfate            23.840049
Conductivity        0.000000
Organic_carbon      0.000000
Trihalomethanes     4.945055
Turbidity           0.000000
Potability          0.000000
dtype: float64


In [9]:
# MICE IMPUTATION to fill the missing data
# create the imputer using MICE

# separate the target variable from the rest of the data to make sure it is not changed or imputed
features = data.drop(columns='Potability')
target = data.Potability
imputer = IterativeImputer(random_state=0)
features_imputed = imputer.fit_transform(features)

# convert the data back into a dataframe
features_imputed = pd.DataFrame(features_imputed, columns=features.columns)

# merge target variable and data
data_imputed = pd.concat([features_imputed, target], axis=1)
data_imputed.head(10)


Unnamed: 0,ph,Hardness,Solids,Chloramines,Sulfate,Conductivity,Organic_carbon,Trihalomethanes,Turbidity,Potability
0,7.190863,204.890455,20791.318981,7.300212,368.516441,564.308654,10.379783,86.99097,2.963135,0
1,3.71608,129.422921,18630.057858,6.635246,344.836463,592.885359,15.180013,56.329076,4.500656,0
2,8.099124,224.236259,19909.541732,9.275884,331.981769,418.606213,16.868637,66.420093,3.055934,0
3,8.316766,214.373394,22018.417441,8.059332,356.886136,363.266516,18.436524,100.341674,4.628771,0
4,9.092223,181.101509,17978.986339,6.5466,310.135738,398.410813,11.558279,31.997993,4.075075,0
5,5.584087,188.313324,28748.687739,7.544869,326.678363,280.467916,8.399735,54.917862,2.559708,0
6,10.223862,248.071735,28749.716544,7.513408,393.663396,283.651634,13.789695,84.603556,2.672989,0
7,8.635849,203.361523,13672.091764,4.563009,303.309771,474.607645,12.363817,62.798309,4.401425,0
8,6.927779,118.988579,14285.583854,7.804174,268.646941,389.375566,12.706049,53.928846,3.595017,0
9,11.180284,227.231469,25484.508491,9.0772,404.041635,563.885481,17.927806,71.976601,4.370562,0


In [10]:
# confirm imputed data
data_imputed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3276 entries, 0 to 3275
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   ph               3276 non-null   float64
 1   Hardness         3276 non-null   float64
 2   Solids           3276 non-null   float64
 3   Chloramines      3276 non-null   float64
 4   Sulfate          3276 non-null   float64
 5   Conductivity     3276 non-null   float64
 6   Organic_carbon   3276 non-null   float64
 7   Trihalomethanes  3276 non-null   float64
 8   Turbidity        3276 non-null   float64
 9   Potability       3276 non-null   int64  
dtypes: float64(9), int64(1)
memory usage: 256.1 KB


In [11]:
# Remove outliers that may affect the neural network's accuracy using IQR method
def remove_outliers_iqr(df, column_name):
    Q1 = df[column_name].quantile(0.25)
    Q3 = df[column_name].quantile(0.75)
    IQR = Q3 - Q1

    # anything above or below this is an outlier
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    # place outliers in a data frame
    print(f"{df[column_name]}")
    outliers = df[(df[column_name] < lower_bound) | (df[column_name] > upper_bound)]
    print(f"Number of outliers: {len(outliers)}")
    print(f"Percentage of outliers: {len(outliers)/len(df)*100:.2f}%")


    # remove outliers

    df_clean = df[(df[column_name] >= lower_bound) & (df[column_name] <= upper_bound)]



    return df_clean

# columns to remove outliers in
columns = ['Hardness', 'Solids', 'Sulfate', 'Conductivity', 'Organic_carbon', 'Trihalomethanes', 'Turbidity']

data_imputed_copy = data_imputed.copy()

for i in columns:
  data_imputed_copy = remove_outliers_iqr(data_imputed_copy, i)









0       204.890455
1       129.422921
2       224.236259
3       214.373394
4       181.101509
           ...    
3271    193.681735
3272    193.553212
3273    175.762646
3274    230.603758
3275    195.102299
Name: Hardness, Length: 3276, dtype: float64
Number of outliers: 83
Percentage of outliers: 2.53%
0       20791.318981
1       18630.057858
2       19909.541732
3       22018.417441
4       17978.986339
            ...     
3271    47580.991603
3272    17329.802160
3273    33155.578218
3274    11983.869376
3275    17404.177061
Name: Solids, Length: 3193, dtype: float64
Number of outliers: 42
Percentage of outliers: 1.32%
0       368.516441
1       344.836463
2       331.981769
3       356.886136
4       310.135738
           ...    
3270    345.700257
3272    338.612062
3273    326.848982
3274    336.993878
3275    338.025733
Name: Sulfate, Length: 3151, dtype: float64
Number of outliers: 230
Percentage of outliers: 7.30%
0       564.308654
1       592.885359
2       418.606213
3 

Plot the Data Appropriately

In [12]:

# generate 2d classification dataset

# X, y = pass

# Transforms data to have mean=0 and standard deviation=1
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X = data_imputed_copy.drop(columns='Potability', axis=1)
y= data_imputed_copy['Potability']

X_scaled = scaler.fit_transform(X)

# from sklearn.decomposition import PCA
# pca = PCA(n_components=2)
# X_2d_pca = pca.fit_transform(X_scaled)

X_scaled.shape


(2830, 9)

In [13]:

# Split the data into training validation and test sets
from sklearn.model_selection import train_test_split
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3,random_state=42,
    stratify=y               # Keep same class distribution in all splits
)

X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5,stratify=y_temp)

print(f"\n=== FINAL SHAPES ===")
print(f"X_train: {X_train.shape}")
print(f"X_val: {X_val.shape}")
print(f"X_test: {X_test.shape}")
print(f"y_train: {y_train.shape}")
print(f"y_val: {y_val.shape}")
print(f"y_test: {y_test.shape}")




=== FINAL SHAPES ===
X_train: (1981, 9)
X_val: (424, 9)
X_test: (425, 9)
y_train: (1981,)
y_val: (424,)
y_test: (425,)


# Each Member Defines their model Here

In [14]:
#Model Definition by member 1
def model_jeremiah_agbaje():
  model = tf.keras.models.Sequential()
  model.add(tf.keras.layers.Dense(128, input_shape=(X_train.shape[1],), name='dense_layer', activation="relu",kernel_regularizer=tf.keras.regularizers.l2(0.0001)))
  model.add(tf.keras.layers.Dropout(0.5))  # 50% dropout after first layer
  model.add(tf.keras.layers.Dense(64, name='dense_layer2', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.00001)))
  model.add(tf.keras.layers.Dropout(0.5))  # 50% dropout after second layer
  model.add(tf.keras.layers.Dense(32, name='dense_layer3', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.00001)))
  model.add(tf.keras.layers.Dropout(0.5))
  model.add(tf.keras.layers.Dense(1, name='output_layer', activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l2(0.00001)))

  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
  return model

model = model_jeremiah_agbaje()

# Early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(
      monitor='val_loss',    # Monitor validation loss
      patience=50,
      restore_best_weights=True  # Restore weights from best epoch
  )

# Train the model
history = model.fit(
  X_train, y_train,
  validation_data=(X_val, y_val),
  epochs=200,
  batch_size=32,
  verbose=1,
  callbacks=[early_stopping]
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/200
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.6051 - loss: 0.7033 - val_accuracy: 0.6297 - val_loss: 0.6785
Epoch 2/200
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5904 - loss: 0.6934 - val_accuracy: 0.6274 - val_loss: 0.6789
Epoch 3/200
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6182 - loss: 0.6820 - val_accuracy: 0.6250 - val_loss: 0.6792
Epoch 4/200
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5860 - loss: 0.6983 - val_accuracy: 0.6250 - val_loss: 0.6793
Epoch 5/200
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6106 - loss: 0.6757 - val_accuracy: 0.6274 - val_loss: 0.6785
Epoch 6/200
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6026 - loss: 0.6912 - val_accuracy: 0.6321 - val_loss: 0.6781
Epoch 7/200
[1m62/62[0m [32m━━━

In [15]:
best_epoch = np.argmin(history.history['val_loss'])
print(f"Best Epoch: {best_epoch+1}")
print(f"Train Accuracy at Best Epoch: {history.history['accuracy'][best_epoch]:.4f}")
print(f"Val Accuracy at Best Epoch: {history.history['val_accuracy'][best_epoch]:.4f}")

Best Epoch: 199
Train Accuracy at Best Epoch: 0.6446
Val Accuracy at Best Epoch: 0.6580


In [16]:
# Evaluate
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Accuracy: {test_accuracy:.4f}, Test Loss: {test_loss:.4f}")

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6282 - loss: 0.6210 
Test Accuracy: 0.6518, Test Loss: 0.6178


In [17]:
def model_gaius_irakiza():
    model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    optimizer = tf.keras.optimizers.Nadam(learning_rate=0.0001)

    model.compile(
        optimizer=optimizer,
        loss='binary_crossentropy',
        metrics=[
            'accuracy',
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall'),
            tf.keras.metrics.AUC(name='auc')
        ]
    )

    return model

gaius_model = model_gaius_irakiza()

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=30,
    min_delta=0.0001,
    restore_best_weights=True
)
history = gaius_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=300,
    batch_size=32,
    verbose=1,
    callbacks=[early_stopping]
)

Epoch 1/300
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 19ms/step - accuracy: 0.4561 - auc: 0.5133 - loss: 0.8778 - precision: 0.3954 - recall: 0.7865 - val_accuracy: 0.4976 - val_auc: 0.5073 - val_loss: 0.6988 - val_precision: 0.3871 - val_recall: 0.6115
Epoch 2/300
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.4577 - auc: 0.5034 - loss: 0.8252 - precision: 0.3744 - recall: 0.6699 - val_accuracy: 0.6014 - val_auc: 0.5259 - val_loss: 0.6785 - val_precision: 0.4167 - val_recall: 0.1911
Epoch 3/300
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.4904 - auc: 0.4900 - loss: 0.7758 - precision: 0.3532 - recall: 0.5470 - val_accuracy: 0.6156 - val_auc: 0.5420 - val_loss: 0.6657 - val_precision: 0.4167 - val_recall: 0.0955
Epoch 4/300
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.5105 - auc: 0.5098 - loss: 0.7356 - precision: 0.3738 - recall: 0.4511 - va

In [18]:
best_epoch = np.argmin(history.history['val_loss'])
print(f"Best Epoch: {best_epoch+1}")
print(f"Train Accuracy at Best Epoch: {history.history['accuracy'][best_epoch]:.4f}")
print(f"Val Accuracy at Best Epoch: {history.history['val_accuracy'][best_epoch]:.4f}")

Best Epoch: 114
Train Accuracy at Best Epoch: 0.6552
Val Accuracy at Best Epoch: 0.6557


In [19]:
# Evaluate the model on test data
test_loss, test_accuracy, test_precision, test_recall, test_auc = gaius_model.evaluate(X_test, y_test, verbose=1)

# Print results with clear formatting
print("\nTest Evaluation Metrics:")
print(f"  Loss      : {test_loss:.4f}")
print(f"  Accuracy  : {test_accuracy:.4f}")
print(f"  Precision : {test_precision:.4f}")
print(f"  Recall    : {test_recall:.4f}")
print(f"  AUC       : {test_auc:.4f}")


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6555 - auc: 0.6818 - loss: 0.6155 - precision: 0.6358 - recall: 0.2532 

Test Evaluation Metrics:
  Loss      : 0.6050
  Accuracy  : 0.6800
  Precision : 0.6667
  Recall    : 0.2675
  AUC       : 0.6799


In [20]:
#Model Definition by member 3 (RMSprop variant)
def model_david():
  model = tf.keras.models.Sequential()
  model.add(tf.keras.layers.Dense(128, input_shape=(X_train.shape[1],), activation="relu",
                                   kernel_regularizer=tf.keras.regularizers.l2(0.0005)))
  model.add(tf.keras.layers.Dense(64, activation="relu",
                                   kernel_regularizer=tf.keras.regularizers.l2(0.0005)))
  model.add(tf.keras.layers.Dense(32, activation="relu",
                                   kernel_regularizer=tf.keras.regularizers.l2(0.0005)))
  model.add(tf.keras.layers.Dense(1, activation="sigmoid",
                                   kernel_regularizer=tf.keras.regularizers.l2(0.0005)))

  model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001),
                loss="binary_crossentropy", metrics=["accuracy"])
  return model

model3 = model_david()

# Early stopping callback
early_stopping_3 = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=50,
    restore_best_weights=True
)

# Train the model
history3 = model3.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=300,
    batch_size=32,
    verbose=1,
    callbacks=[early_stopping_3]
)



Epoch 1/300
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 33ms/step - accuracy: 0.6246 - loss: 0.7315 - val_accuracy: 0.6321 - val_loss: 0.7154
Epoch 2/300
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.6477 - loss: 0.6954 - val_accuracy: 0.6509 - val_loss: 0.6962
Epoch 3/300
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.6472 - loss: 0.6846 - val_accuracy: 0.6226 - val_loss: 0.7004
Epoch 4/300
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.6770 - loss: 0.6538 - val_accuracy: 0.6509 - val_loss: 0.6796
Epoch 5/300
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.6920 - loss: 0.6420 - val_accuracy: 0.6509 - val_loss: 0.6762
Epoch 6/300
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.6909 - loss: 0.6291 - val_accuracy: 0.6557 - val_loss: 0.6733
Epoch 7/300
[1m62/62[0m [32

In [21]:
# Get best epoch based on validation loss
best_epoch = history3.history['val_loss'].index(min(history3.history['val_loss']))

# Get train and validation accuracy at best epoch
train_acc_at_best = history3.history['accuracy'][best_epoch]
val_acc_at_best = history3.history['val_accuracy'][best_epoch]

print(f"Best Epoch: {best_epoch + 1}")  # +1 for human-readable epoch number
print(f"Train Accuracy at Best Epoch: {train_acc_at_best:.4f}")
print(f"Validation Accuracy at Best Epoch: {val_acc_at_best:.4f}")


Best Epoch: 6
Train Accuracy at Best Epoch: 0.6936
Validation Accuracy at Best Epoch: 0.6557


In [22]:
from sklearn.metrics import precision_score, recall_score, roc_auc_score

# Predict probabilities
y_pred_probs = model3.predict(X_test).ravel()

# Predict binary classes
y_pred_classes = (y_pred_probs > 0.5).astype("int32")

# Evaluate loss and accuracy
test_loss, test_accuracy = model3.evaluate(X_test, y_test, verbose=0)

# Calculate precision, recall, and AUC
test_precision = precision_score(y_test, y_pred_classes)
test_recall = recall_score(y_test, y_pred_classes)
test_auc = roc_auc_score(y_test, y_pred_probs)

# Print results
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Precision: {test_precision:.4f}")
print(f"Test Recall: {test_recall:.4f}")
print(f"Test AUC: {test_auc:.4f}")


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
Test Loss: 0.6443
Test Accuracy: 0.6800
Test Precision: 0.6400
Test Recall: 0.3057
Test AUC: 0.7112


In [23]:
#Model Definition by member 4
!pip install tensorflow
def model_tamanda_kaunda():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(96, activation='relu', kernel_regularizer=tf.keras.regularizers.l1(0.001)),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(48, activation='relu', kernel_regularizer=tf.keras.regularizers.l1(0.0005)),
        tf.keras.layers.Dropout(0.4),
        tf.keras.layers.Dense(24, activation='relu', kernel_regularizer=tf.keras.regularizers.l1(0.0005)),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(12, activation='relu', kernel_regularizer=tf.keras.regularizers.l1(0.0001)),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)

    model.compile(
        optimizer=optimizer,
        loss='binary_crossentropy',
        metrics=[
            'accuracy',
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall'),
            tf.keras.metrics.AUC(name='auc')
        ]
    )

    return model

tamanda_model = model_tamanda_kaunda()

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=25,
    min_delta=0.001,
    restore_best_weights=True
)

history = tamanda_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=150,
    batch_size=64,
    verbose=1,
    callbacks=[early_stopping]
)

Epoch 1/150
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 30ms/step - accuracy: 0.5335 - auc: 0.5002 - loss: 1.1086 - precision: 0.3798 - recall: 0.3421 - val_accuracy: 0.6297 - val_auc: 0.4878 - val_loss: 1.0599 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 2/150
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6185 - auc: 0.5096 - loss: 1.0572 - precision: 0.4519 - recall: 0.0216 - val_accuracy: 0.6297 - val_auc: 0.5069 - val_loss: 1.0299 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 3/150
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6192 - auc: 0.5207 - loss: 1.0263 - precision: 0.0273 - recall: 3.2438e-04 - val_accuracy: 0.6297 - val_auc: 0.5235 - val_loss: 1.0000 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 4/150
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6366 - auc: 0.5402 - loss: 0.9858 - precision: 

In [24]:
best_epoch = np.argmin(history.history['val_loss'])
print(f"Best Epoch: {best_epoch+1}")
print(f"Train Accuracy at Best Epoch: {history.history['accuracy'][best_epoch]:.4f}")
print(f"Val Accuracy at Best Epoch: {history.history['val_accuracy'][best_epoch]:.4f}")

Best Epoch: 102
Train Accuracy at Best Epoch: 0.6683
Val Accuracy at Best Epoch: 0.6226


In [25]:
test_loss, test_accuracy, test_precision, test_recall, test_auc = tamanda_model.evaluate(X_test, y_test, verbose=1)

# Print results with clear formatting
print("\nTest Evaluation Metrics:")
print(f"  Loss      : {test_loss:.4f}")
print(f"  Accuracy  : {test_accuracy:.4f}")
print(f"  Precision : {test_precision:.4f}")
print(f"  Recall    : {test_recall:.4f}")
print(f"  AUC       : {test_auc:.4f}")

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6803 - auc: 0.7034 - loss: 0.6448 - precision: 0.6429 - recall: 0.3840

Test Evaluation Metrics:
  Loss      : 0.6439
  Accuracy  : 0.6988
  Precision : 0.6747
  Recall    : 0.3567
  AUC       : 0.6926


In [None]:
#Model Definition by member 5
def model_name_of_student():

  return

# Start the training Process

In [None]:

#fit model
history = model.fit(X, Y, validation_data=(testX, testy), epochs=4000, verbose=0, callbacks=[es])
# evaluate the model
_, train_acc = model.evaluate(trainX, trainy, verbose=0)
_, test_acc = model.evaluate(testX, testy, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))
# plot training history
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
import numpy

In [None]:
#Data Loading and Preprocessing
# The coach will never do this!!
regularizer = 'l1'

In [None]:
model = Sequential()
model.add(Dense(32, activation ='relu', kernel_regularizer= regularizer , input_shape = (2224,224)))
model.add(Dropout(0.2))
#adding Dropout
model.add(Dense(64, activation ='relu', kernel_regularizer= regularizer , input_shape = (2224,224)))
#adding Dropout
model.add(Dense(128, activation ='relu', kernel_regularizer= regularizer , input_shape = (2224,224)))
model.add(Dropout(0.2))
#adding Dropout
model.add(Dense(2, activation = 'sigmoid'))

In [None]:
callback =EarlyStopping(monitor='loss',patience=3)

In [None]:
model.compile(optimizer='adam', loss= 'rmse', metrics = ['accuracy'])

In [None]:
model.fit(X, Y, epochs=1000, batch_size= 128, callbacks=[callback], verbose=0)

In [None]:
model.summary()