In [65]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout





In [66]:
# Load dataset
df = pd.read_csv('TASK-ML-INTERN.csv')

# Drop unnecessary column
df.drop('hsi_id', axis=1, inplace=True)

In [67]:
def remove_outliers_iqr(df, threshold=4):
    Q1 = df.quantile(0.25)
    Q3 = df.quantile(0.75)
    IQR = Q3-Q1
    lower_bound = Q1 - threshold * IQR
    upper_bound = Q3 + threshold * IQR
    return df[~((df<lower_bound)| (df>upper_bound)).any(axis=1)]

In [68]:
df_cleaned = remove_outliers_iqr(df)
df_cleaned.reset_index(inplace = True)
print("Original shape:", df.shape)
print("New shape after removing outliers:", df_cleaned.shape)

Original shape: (500, 449)
New shape after removing outliers: (468, 450)


In [69]:
df_cleaned

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,...,439,440,441,442,443,444,445,446,447,vomitoxin_ppb
0,0,0.416181,0.396844,0.408985,0.372865,0.385293,0.365390,0.355226,0.343350,0.344837,...,0.710280,0.717482,0.715078,0.705379,0.696691,0.692793,0.711369,0.697679,0.704520,1100.0
1,1,0.415797,0.402956,0.402564,0.396014,0.397192,0.389634,0.375671,0.363689,0.373883,...,0.684011,0.697271,0.701995,0.696077,0.701012,0.677418,0.696921,0.696544,0.689054,1000.0
2,2,0.389023,0.371206,0.373098,0.373872,0.361056,0.349709,0.333882,0.330841,0.328925,...,0.683054,0.669286,0.663179,0.676165,0.676591,0.655951,0.658945,0.670989,0.665176,1300.0
3,3,0.468837,0.473255,0.462949,0.459335,0.461672,0.459824,0.458194,0.427737,0.415360,...,0.742782,0.730801,0.736787,0.730044,0.751437,0.738497,0.742446,0.754657,0.733474,1300.0
4,4,0.483352,0.487274,0.469153,0.487648,0.464026,0.451152,0.458229,0.440782,0.426193,...,0.770227,0.773013,0.761431,0.763488,0.762473,0.744012,0.775486,0.760431,0.751988,220.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
463,495,0.478140,0.444033,0.442120,0.437473,0.428672,0.413238,0.417758,0.420388,0.413290,...,0.747858,0.730535,0.716969,0.739297,0.724827,0.720484,0.740626,0.740116,0.721839,1200.0
464,496,0.409367,0.394941,0.380236,0.375340,0.346122,0.354650,0.361170,0.342974,0.352137,...,0.670232,0.659045,0.661587,0.658422,0.644254,0.646479,0.656779,0.646700,0.646733,0.0
465,497,0.486526,0.501372,0.500175,0.508139,0.489411,0.457311,0.462321,0.462927,0.442647,...,0.787532,0.780347,0.768362,0.771411,0.770919,0.761464,0.770314,0.763324,0.797187,0.0
466,498,0.464595,0.498822,0.489077,0.453381,0.487636,0.461950,0.461671,0.447362,0.451952,...,0.739432,0.759722,0.752118,0.761910,0.761111,0.730431,0.753545,0.749619,0.756383,0.0


In [70]:
df_cleaned.drop('index',axis=1,inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned.drop('index',axis=1,inplace = True)


In [71]:
X=df_cleaned.drop('vomitoxin_ppb',axis =1 )

In [72]:
Y=df_cleaned['vomitoxin_ppb']

In [73]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [74]:
from sklearn.decomposition import PCA
import numpy as np

pca = PCA()
X_pca = pca.fit_transform(X_scaled)
print(X_pca.shape)

n_components = np.argmax(np.cumsum(pca.explained_variance_ratio_) >= 0.95) + 1
print(f"Optimal number of PCA components: {n_components}")

# Apply PCA with optimal number of components
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_scaled)
X_pca = pd.DataFrame(X_pca)

(468, 448)
Optimal number of PCA components: 3


In [75]:
# Splitting Data
X_train, X_test, Y_train, Y_test = train_test_split(X_pca, Y, test_size=0.2, random_state=42)

# Scaling the transformed features again
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [76]:
scaler_y = StandardScaler()
Y_train = scaler_y.fit_transform(pd.DataFrame(Y_train))
Y_test = scaler_y.transform(pd.DataFrame(Y_test))

In [77]:

# Define Improved ANN Model
model = Sequential([
    Dense(160, activation='relu', input_shape=(X_train.shape[1],)),  # HL1
    BatchNormalization(),
    Dropout(0.4),
    
    Dense(128, activation='relu'),  # HL2
    BatchNormalization(),
    Dropout(0.4),
    
    Dense(64, activation='relu'),  # HL3
    BatchNormalization(),
    Dropout(0.2),
    
    Dense(1, activation='linear')  # Output Layer for Regression
])


In [78]:
import tensorflow
opt = tensorflow.keras.optimizers.Adam(learning_rate = 0.001)
loss = tf.keras.losses.MeanSquaredError()
print(loss)




<keras.src.losses.MeanSquaredError object at 0x31c0ba730>


In [79]:


# Compile Model
model.compile(optimizer=opt, loss='mse', metrics=['mae'])

# Train Model
history = model.fit(
    X_train, Y_train,
    validation_data=(X_test, Y_test),
    epochs=100,
    batch_size=32,
)

# Model Summary
model.summary()




Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [80]:
Y_pred = model.predict(X_test)







In [81]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

mse = mean_squared_error(Y_test, Y_pred)
mae = mean_absolute_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)
print(mse,mae,r2)

0.8381962117253147 0.5621109554281531 0.3862726864545537


In [82]:
pip install keras-tuner


Note: you may need to restart the kernel to use updated packages.


In [83]:
import tensorflow as tf
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization

# Function to build the ANN model
def build_model(hp):
    model = Sequential()
    
    # Input Layer + First Hidden Layer
    model.add(Dense(
        hp.Int('units_1', min_value=32, max_value=256, step=32),
        activation=hp.Choice('activation_1', values=['relu', 'elu']),
        input_shape=(X_train.shape[1],)
    ))
    model.add(BatchNormalization())
    model.add(Dropout(hp.Float('dropout_1', 0.1, 0.5, step=0.1)))

    # Second Hidden Layer
    model.add(Dense(
        hp.Int('units_2', min_value=32, max_value=128, step=32),
        activation=hp.Choice('activation_2', values=['relu', 'elu'])
    ))
    model.add(BatchNormalization())
    model.add(Dropout(hp.Float('dropout_2', 0.1, 0.5, step=0.1)))

    # Third Hidden Layer
    model.add(Dense(
        hp.Int('units_3', min_value=32, max_value=128, step=32),
        activation=hp.Choice('activation_3', values=['relu', 'elu'])
    ))
    model.add(BatchNormalization())
    model.add(Dropout(hp.Float('dropout_3', 0.1, 0.5, step=0.1)))

    # Output Layer
    model.add(Dense(1, activation='linear'))

    # Compile the model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            hp.Choice('learning_rate', values=[0.001, 0.0005, 0.0001])
        ),
        loss='mse',
        metrics=['mae']
    )
    
    return model

# Initialize Keras Tuner (Hyperband Search)
tuner = kt.Hyperband(
    build_model,
    objective='val_loss',
    max_epochs=100,
    factor=3,
    directory='keras_tuner_results',
    project_name='ann_tuning'
)

# Search for best hyperparameters
tuner.search(X_train, Y_train, epochs=50, validation_data=(X_test, Y_test))

# Get the best model
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
best_model = tuner.hypermodel.build(best_hps)

# Train the best model
history = best_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=100, batch_size=32)

# Evaluate the model
Y_pred = best_model.predict(X_test)
print("Best Hyperparameters:")
for key in best_hps.values.keys():
    print(f"{key}: {best_hps.get(key)}")



Reloading Tuner from keras_tuner_results/ann_tuning/tuner0.json




Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [84]:
print("Best Hyperparameters:")
for key in best_hps.values.keys():
    print(f"{key}: {best_hps.get(key)}")

Best Hyperparameters:
units_1: 160
activation_1: relu
dropout_1: 0.4
units_2: 128
activation_2: relu
dropout_2: 0.4
units_3: 64
activation_3: elu
dropout_3: 0.2
learning_rate: 0.001
tuner/epochs: 100
tuner/initial_epoch: 34
tuner/bracket: 2
tuner/round: 2
tuner/trial_id: 0230


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

# Build the model with hyperparameter-tuned values
model = Sequential([
    Dense(160, activation='relu', input_shape=(X_train.shape[1],)),  # HL1 - Updated to 160 units
    BatchNormalization(),
    Dropout(0.4),  # Updated dropout

    Dense(128, activation='relu'),  # HL2 - Updated to 128 units
    BatchNormalization(),
    Dropout(0.4),  # Updated dropout

    Dense(64, activation='elu'),  # HL3 - Updated to 64 units, changed activation to ELU
    BatchNormalization(),
    Dropout(0.2),  # Updated dropout

    Dense(1, activation='linear')  # Output layer (Regression)
])


opt = Adam(learning_rate=0.001)


model.compile(optimizer=opt, loss='mse', metrics=['mae'])

# Train Model using tuned epochs
history = model.fit(
    X_train, Y_train,
    validation_data=(X_test, Y_test),
    epochs=100,  # Updated to 100 as per tuning
    batch_size=32,
)


model.summary()

# Make Predictions
Y_pred = model.predict(X_test)




Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [86]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

mse = mean_squared_error(Y_test, Y_pred)
mae = mean_absolute_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)
print(mse,mae,r2)

0.8471505154326717 0.5679262395639423 0.37971634477451366


In [87]:
model.save('tuned_ann_model.h5')


  saving_api.save_model(
