In [None]:
"""
MatSoc: Intro to ML Techniques
Assignment 3
Deadline: Sunday, 14 January 2024, 23:59 IST

Name: Teesha Mittal
Roll Number: 211104
IITK Email: teesham21@iitk.ac.in

You may use the ML library of your choice to do this part.

1. Train a neural network on tensorflow for the given data
2. Train a decision tree with sklearn for the given data
3. Make predictions using the ensemble (average) of the two datasets
4. Report the error for the the neural network, decision tree and the ensemble.
5. Use Sampling with replacement to increase the data size by 15 percent.
6. Use 15% of the increased data set as the test data.

  Note that the code for all parts must be visible

  Make sure all the code is run before you submit the notebook.

"""

In [None]:
"1."

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers

In [None]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize the data
x_train = x_train / 255.0
x_test = x_test / 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
model = tf.keras.Sequential([
    layers.Flatten(input_shape=(28, 28)),
    layers.Dense(128, activation="relu"),
    layers.Dense(128, activation="relu"),
    layers.Dense(10, activation="softmax")
])

In [None]:
model.compile(
    optimizer="adam",
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"]
)

In [None]:
model.fit(x_train, y_train, epochs=5)

In [None]:
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)

In [None]:
print(f"Test accuracy: {test_acc}")

Test accuracy: 0.9747999906539917


In [None]:
"2."

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from sklearn.metrics import mean_squared_error

In [None]:
data = load_iris()
X = data.data
y = data.target

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Creating a decision tree classifier
tree_classifier = DecisionTreeClassifier(random_state=42)

In [None]:
# Training the decision tree classifier
tree_classifier.fit(X_train, Y_train)

In [None]:
y_pred = tree_classifier.predict(X_test)

In [None]:
accuracy = accuracy_score(Y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 1.0


In [None]:
"3."

In [None]:
# Ensemble predictions by averaging
ensemble_predictions = (test_acc + y_pred) / 2

In [None]:
"4."

In [None]:
print(f"Error of neural network: {test_loss}")

In [None]:
mse = mean_squared_error(Y_test, y_pred)
print(f"Error of decision tree: {mse}")

In [None]:
ensemble_mse = mean_squared_error(Y_test, ensemble_predictions)
print(f"Error of ensemble: {ensemble_mse}")

In [None]:
"5."

In [None]:
# Calculating the number of samples to add
no_samples_to_add = int(0.15 * len(X))

In [None]:
duplicate_indices = np.random.choice(len(X), no_samples_to_add, replace=True)

In [None]:
X_duplicate = X[duplicate_indices]
y_duplicate = y[duplicate_indices]

In [None]:
X_enlarged = np.concatenate([X, X_duplicate], axis=0)
y_enlarged = np.concatenate([y, y_duplicate], axis=0)

In [None]:
print(f"Original data shape: {X.shape}")

Original data shape: (150, 4)


In [None]:
print(f"Increased data shape: {X_enlarged.shape}")

Increased data shape: (172, 4)


In [None]:
"6."

In [None]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_enlarged, y_enlarged, test_size=0.15, random_state=42)

In [None]:
X_test