In [7]:
import pandas as pd, numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold

In [2]:
# Load the dataset
glass_df = pd.read_csv('glass.csv')

# Assuming the last column is the target variable
X = glass_df.iloc[:, :-1].values
Y = glass_df.iloc[:, -1].values

# Normalize features
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

# Encode class values as integers and then convert to dummy (one-hot encoded)
encoder = LabelEncoder()
Y_encoded = encoder.fit_transform(Y)
y_dummy = to_categorical(Y_encoded)

In [6]:
glass_df.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.1,71.78,0.06,8.75,0.0,0.0,1
1,1.51761,13.89,3.6,1.36,72.73,0.48,7.83,0.0,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.0,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.0,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.0,0.0,1


In [3]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y_dummy, test_size=0.2, random_state=7)

def create_model():
    # Create model
    model = Sequential()
    model.add(Dense(10, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(y_dummy.shape[1], activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [4]:
# Train an ensemble of models
n_models = 10
models = [create_model() for _ in range(n_models)]
for model in models:
    model.fit(X_train, y_train, epochs=150, verbose=0)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [5]:
# Make predictions with each model and average them
predictions = [model.predict(X_test) for model in models]
avg_predictions = np.mean(predictions, axis=0)
ensemble_pred = np.argmax(avg_predictions, axis=1)

# Decode one-hot to single integer label
test_true = np.argmax(y_test, axis=1)

# Calculate accuracy
accuracy = accuracy_score(test_true, ensemble_pred)
print("Ensemble Accuracy: {:.2f}%".format(accuracy * 100))

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Ensemble Accuracy: 65.12%


In [9]:
# Setup 10-fold cross-validation
kfold = KFold(n_splits=10, shuffle=True, random_state=7)
cvscores = []

# Perform 10-fold cross-validation
for train, test in kfold.split(X_normalized, y_dummy):
    # Create a new model for the current fold
    model = create_model()
    
    # Train the model
    model.fit(X_normalized[train], y_dummy[train], epochs=150, verbose=0)
    
    # Evaluate the model
    predictions = model.predict(X_normalized[test])
    y_pred = np.argmax(predictions, axis=1)
    y_true = np.argmax(y_dummy[test], axis=1)
    accuracy = accuracy_score(y_true, y_pred)
    cvscores.append(accuracy)

# Calculate average and standard deviation of the scores
mean_accuracy = np.mean(cvscores)
std_accuracy = np.std(cvscores)
print(f"Mean Accuracy: {mean_accuracy:.2f}%")
print(f"Standard Deviation: {std_accuracy:.2f}%")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
Mean Accuracy: 0.61%
Standard Deviation: 0.10%
