Neural network

In [None]:
from sklearn.datasets import load_iris
import pandas as pd
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# Load the Iris dataset
iris = load_iris()
X = iris.data # attributes
y = iris.target # labels

# Convert to a DataFrame for easier exploration
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df["target"] = iris.target

# Display the first few rows
iris_df.head()

# One-hot encode the target variable
print("One-hot encoding")
#print(y)
y = to_categorical(y, num_classes=3)
#print(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature data
# The goal of standardization is to transform the feature data so that it has a mean of zero and a standard
# deviation of one, ensuring that each feature contributes equally to the model.
# fit_transform computes the mean and standard deviation of the features and apply the transformation.
# This ensures that the training data is standardized based on its own statistics.
# the test data is standardized using the same mean and standard deviation values calculated from the training data.
# By applying transform to X_test the test data is standardized in the same way, without recalculating the mean and standard deviation for the test set.
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build the neural network model (constructor)
# Hidden layer with 10 neurons receives input from the training data (X_train)
# The input dimension is determined by the number of features in the data (X_train.shape[1]).
# ReLU introduces non-linearity into the model. It activates the neurons by outputting the input directly if it is positive,
# and outputs zero if the input is negative, thereby allowing the model to learn non-linear patterns.
# The output layer has 3 neurons (the three possible classes in the Iris dataset). The activation function softmax
# (multi-class classification problem) outputs a probability distribution across the three classes, where the sum of the probabilities for
# all classes is 1. The class with the highest probability is the predicted class for a given input.
# Each layer plays a vital role in transforming the input data into meaningful output for classification, with
# the hidden layer learning the features and the output layer providing the final classification.
model = Sequential([
    Dense(10, input_dim=X_train.shape[1], activation="relu"), # first param is hidden layer and second the input layer
    Dense(3, activation="softmax") # Output layer with 3 neurons (one for each class)
])


# Build the neural network model using the .add() syntax (setter)
# model = Sequential()
# model.add(Dense(10, input_dim=X_train.shape[1], activation="relu")) # Hidden layer with 10 neurons
# model.add(Dense(3, activation="softmax")) # Output layer with 3 neurons (one for each class)

# Compile the model
# Compiling the model configures the learning process by specifying the optimization algorithm, the loss function, and the evaluation metrics.
# In this case, we use the compile method to define these elements.
# The optimizer is responsible for updating the model’s weights during training. We use the Adam (one of the most common in deep learning).
# It combines Adagrad and RMSProp algorithms, and adapts the learning rate for each parameter. It is well-suited for problems with large datasets
# and high-dimensional parameter spaces.
# The loss function measures how well the model’s predictions match the true labels. We use categorical_crossentropy and it is used when there are
# multiple, mutually exclusive classes. It calculates the difference between the predicted probability distribution and the actual class label,
# which is one-hot encoded.
# The metrics determine how we evaluate the model’s performance during training and testing. The 'accuracy' measures the proportion of correct
# predictions out of all predictions.
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Train the model
# The epochs parameter defines how many times the model will iterate over the entire training dataset.
# A high number of epochs may lead to overfitting if the model is trained for too many epochs without proper regularization.
# The batch_size parameter specifies how many samples the model will process before updating its weights.
# The validation_split parameter is set to 0.2, meaning that 20% of the training data will be used for validation during training.
# The training process will return a history object, which contains the loss and accuracy values for both the training
# and validation sets at each epoch.
print("HISTORY")
history = model.fit(X_train, y_train, epochs=50, batch_size=8, validation_split=0.2)

# Evaluate model
# Accuracy: percentage of correct predictions made by the model on the test data
loss, accuracy = model.evaluate(X_test, y_test)
print(f"\nTest Accuracy: {accuracy:.2f}\nTest Loss: {loss:.2f}")

predictions = model.predict(X_test)
print(f"\nPredictions\n{predictions}")

# Plotting the confusion matrix
# A confusion matrix is a performance measurement tool for classification problems, particularly in supervised learning.
# It is a table that describes the performance of a classification model by comparing the predicted labels with the true labels.
# The matrix allows us to see not only the errors made by the classifier but also the types of errors.
# Each row of the matrix represents the true class, while each column represents the predicted class.
print("\nConfusion matrix")
conf_matrix = confusion_matrix(y_test.argmax(axis=1), predictions.argmax(axis=1))
plt.figure(figsize=(6, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=["Class 0","Class 1", "Class 2"], yticklabels=["Class 0", "Class 1", "Class 2"])
plt.title("Confusion Matrix")
plt.xlabel("Predicted Class")
plt.ylabel("True Class")
plt.show()

# The first row of the confusion matrix indicates that there are 10 instances of Class 0 (setosa) correctly classified, and none misclassified as
# other classes.
# The second row shows that 8 instances of Class 1 (versicolor) were correctly predicted, and 1 instance was misclassified as Class 2 (virginica).
# The third row shows that 11 instances of Class 2 (virginica) were correctly predicted, with no misclassifications.
# We aim for all numbers to be on the main diagonal, meaning that the model has correctly classified every instance without any misclassifications.

print("\nPlotting the predicted classes distribution")
plt.figure(figsize=(6, 4))
plt.hist(predictions.argmax(axis=1), bins=3, rwidth=0.8, color='skyblue', edgecolor='black')
plt.title("Distribution of Predicted Classes")
plt.xlabel("Predicted Class")
plt.ylabel("Number of Instances")
plt.xticks([0, 1, 2], ['Class 0', 'Class 1', 'Class 2'])
plt.show()

ONE-HOT ENCODING
One-hot encoding is a method used to convert categorical variables into a numerical format that can be easily interpreted by machine learning algorithms. In many real-world datasets, the features or target variables are categorical, meaning they represent distinct categories or classes, such as colors, types of animals, or different product categories. However, most machine learning algorithms require numerical input to perform calculations. One-hot encoding solves this problem by converting each category into a binary vector where only one element is "hot" (set to 1) and the others are "cold" (set to 0). For example, consider a simple dataset with a categorical variable "color" which can have three possible values: red, green, and blue. Using one-hot encoding, each of these categories would be represented as a binary vector. The color "red" might be encoded as [1, 0, 0], "green" as [0, 1, 0], and "blue" as [0, 0, 1]. This binary representation allows algorithms to understand the categorical nature of the data without assuming any inherent order or relationship between the categories, which could be the case if we were to simply assign numeric values like 1, 2, and 3 to represent the colors. One-hot encoding is particularly important because it ensures that machine learning models do not mistakenly interpret categorical data as ordinal, meaning they will not assume any hierarchy or ranking between categories.

Assignment 6

In [None]:
import pandas as pd
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from sklearn.datasets import load_iris

# Load the CSV data and
print("\nWINES")
wines = '/content/drive/MyDrive/Colab Notebooks/wines.csv'
data = pd.read_csv(wines)
X = data.iloc[:, :-2].values  # Features or attributes include columns except the last two (quality and label)
y = data['quality'].values    # The 'quality' column as the target label
feature_names = data.columns[:-2].tolist() # Get the feature names

# Convert to a DataFrame for easier exploration
wines_df = pd.DataFrame(data.iloc[:, :-2].values, columns=data.columns[:-2])  # Feature columns
wines_df["target"] = data['quality']  # Add the target column

# print("Check if noise in data set")
# print(wines_df[wines_df.isnull().any(axis=1)])
# print("Number of unique classes and their values")
unique_classes = data['quality'].nunique()
# print(data['quality'].dropna().unique())

# One-hot encoding
y = to_categorical(y - y.min(), num_classes=unique_classes)
# print(y[:10])

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build the neural network model (constructor)
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation="relu"), # first param is hidden layer and second the input layer
    Dense(unique_classes, activation="softmax") # Output layer with 3 neurons (one for each class)
])

# Compile the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Train the model
print("HISTORY")
history = model.fit(X_train, y_train, epochs=25, batch_size=150, validation_split=0.2)

loss, accuracy = model.evaluate(X_test, y_test)
print(f"\nTest Accuracy: {accuracy:.2f}\nTest Loss: {loss:.2f}")

predictions = model.predict(X_test)
print(f"\nPredictions\n{predictions}")

# Plotting the confusion matrix
print("\nConfusion matrix")
conf_matrix = confusion_matrix(y_test.argmax(axis=1), predictions.argmax(axis=1))
plt.figure(figsize=(8, 8))
sns.heatmap(
    conf_matrix,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=["Quality 3", "Quality 4", "Quality 5", "Quality 6", "Quality 7", "Quality 8"],
    yticklabels=["Quality 3", "Quality 4", "Quality 5", "Quality 6", "Quality 7", "Quality 8"]
    )
plt.title("Confusion Matrix")
plt.xlabel("Predicted Class")
plt.ylabel("True Class")
plt.show()

print("\nPlotting the predicted classes distribution")
plt.figure(figsize=(12, 8))
plt.hist(predictions.argmax(axis=1), bins=7, rwidth=0.8, color='skyblue', edgecolor='black')
plt.title("Distribution of Predicted Classes")
plt.xlabel("Predicted Class")
plt.ylabel("Number of Instances")
plt.xticks([0, 1, 2, 3, 4, 5, 6], ['Class 0', 'Class 1', 'Class 2', "Class 3", "Class 4", "Class 5", "Class 6"])
plt.show()