#**M3**
##1.Large Scale Machine Learning Systems
##a.	The Parameter Server Model


In [None]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import tensorflow as tf

# Generate synthetic data
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_clusters_per_class=2, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the parameter server
class ParameterServer:
    def __init__(self, model):
        self.model = model
        self.weights = None

    def update_weights(self, new_weights):
        self.weights = new_weights

    def get_weights(self):
        return self.weights

# Define the worker
class Worker:
    def __init__(self, model, parameter_server):
        self.model = model
        self.parameter_server = parameter_server

    def train(self, X, y):
        self.model.partial_fit(X, y, classes=np.unique(y))

    def update_parameters(self):
        weights = self.model.coef_
        self.parameter_server.update_weights(weights)

# Initialize the parameter server and workers
parameter_server = ParameterServer(SGDClassifier(loss='log'))
workers = [Worker(SGDClassifier(loss='log'), parameter_server) for _ in range(5)]

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    for worker in workers:
        # Each worker trains on its own subset of data
        worker.train(X_train, y_train)

    # Aggregate and update weights on the parameter server
    aggregated_weights = np.mean([worker.model.coef_ for worker in workers], axis=0)
    parameter_server.update_weights(aggregated_weights)

# Evaluate the final model on the test set
final_weights = parameter_server.get_weights()
y_pred = np.argmax(X_test.dot(final_weights.T), axis=1)
accuracy = accuracy_score(y_test, y_pred)
print(f"Final accuracy on test set: {accuracy}")


##b.	Spark Architecture

In [None]:
from pyspark.sql import SparkSession
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.evaluation import BinaryClassificationEvaluator
import tensorflow as tf
from sklearn.datasets import make_classification
from sklearn.linear_model import SGDClassifier
import numpy as np

# Create a Spark session
spark = SparkSession.builder.appName("SparkMLExample").getOrCreate()

# Generate synthetic data
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_clusters_per_class=2, random_state=42)

# Create a DataFrame from the numpy arrays
data = [(float(y[i]), Vectors.dense(X[i])) for i in range(len(y))]
df = spark.createDataFrame(data, ["label", "features"])

# Split the data into training and testing sets
train_data, test_data = df.randomSplit([0.8, 0.2], seed=42)

# Define the parameter server
class ParameterServer:
    def __init__(self, model):
        self.model = model
        self.weights = None

    def update_weights(self, new_weights):
        self.weights = new_weights

    def get_weights(self):
        return self.weights

# Define the worker
class Worker:
    def __init__(self, model, parameter_server):
        self.model = model
        self.parameter_server = parameter_server

    def train(self, data):
        self.model.fit(data)

    def update_parameters(self):
        weights = np.array(self.model.coefficients)
        self.parameter_server.update_weights(weights)

# Convert Spark DataFrame to TensorFlow Dataset
train_data_tf = tf.data.Dataset.from_tensor_slices((np.array(train_data.select("features").collect()), np.array(train_data.select("label").collect())))
test_data_tf = tf.data.Dataset.from_tensor_slices((np.array(test_data.select("features").collect()), np.array(test_data.select("label").collect())))

# Initialize the parameter server and workers
parameter_server = ParameterServer(SGDClassifier(loss='log'))
workers = [Worker(LogisticRegression(), parameter_server) for _ in range(5)]

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    for worker in workers:
        # Each worker trains on its own subset of data
        worker.train(train_data_tf)

    # Aggregate and update weights on the parameter server
    aggregated_weights = np.mean([worker.model.coefficients for worker in workers], axis=0)
    parameter_server.update_weights(aggregated_weights)

# Evaluate the final model on the test set
final_weights = parameter_server.get_weights()
test_results = LogisticRegressionModel(weights=DenseVector(final_weights)).transform(test_data)
evaluator = BinaryClassificationEvaluator()
accuracy = evaluator.evaluate(test_results)
print(f"Final accuracy on test set: {accuracy}")

# Stop the Spark session
spark.stop()


##c.	TensorFlow Architecture

In [None]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Generate synthetic data
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_clusters_per_class=2, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the Parameter Server Model
class ParameterServerModel(tf.Module):
    def __init__(self, input_size):
        self.model = Sequential([
            Dense(1, input_shape=(input_size,), activation='sigmoid')
        ])

    def forward(self, x):
        return self.model(x)

# Initialize the parameter server model
input_size = X_train.shape[1]
parameter_server_model = ParameterServerModel(input_size)

# Define the training function for the workers
def train_worker(model, X, y, epochs=1):
    model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X, y, epochs=epochs, verbose=0)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    # Create a new model for each worker (assuming independent models)
    worker_model = SGDClassifier(loss='log')

    # Train each worker on its subset of data
    train_worker(worker_model, X_train, y_train)

    # Get the weights from each worker and update the parameter server model
    worker_weights = worker_model.coef_.flatten()
    parameter_server_model.model.layers[0].set_weights([worker_weights.reshape((input_size,)), np.array([0.0])])

# Evaluate the final model on the test set
y_pred = np.round(parameter_server_model.forward(X_test).numpy()).flatten()
accuracy = accuracy_score(y_test, y_pred)
print(f"Final accuracy on test set: {accuracy}")


#2.	 Execution of ML (or Big Data) Algorithms on parallel / distributed systems:
##a.	Performance Improvement and Trade-offs

In [None]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
import time

# Generate synthetic data
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_clusters_per_class=2, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features for scikit-learn model
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Baseline SGDClassifier
baseline_model = SGDClassifier(loss='log', random_state=42)
baseline_model.fit(X_train_scaled, y_train)
y_pred_baseline = baseline_model.predict(X_test_scaled)
accuracy_baseline = accuracy_score(y_test, y_pred_baseline)
print(f"Baseline SGDClassifier Accuracy: {accuracy_baseline}")

# TensorFlow Neural Network with Early Stopping
start_time = time.time()

tf_model = Sequential([
    Dense(64, input_shape=(X_train_scaled.shape[1],), activation='relu'),
    Dense(1, activation='sigmoid')
])

tf_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Introduce early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model
history = tf_model.fit(
    X_train_scaled, y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping],
    verbose=0
)

# Evaluate the model
y_pred_tf = np.round(tf_model.predict(X_test_scaled)).flatten()
accuracy_tf = accuracy_score(y_test, y_pred_tf)
print(f"TensorFlow Neural Network Accuracy: {accuracy_tf}")

# Calculate training time
training_time = time.time() - start_time
print(f"Training time: {training_time:.2f} seconds")

# Compare performance and trade-offs
print("\nPerformance Comparison:")
print(f"Baseline SGDClassifier Accuracy: {accuracy_baseline}")
print(f"TensorFlow Neural Network Accuracy: {accuracy_tf}")
print(f"Training time: {training_time:.2f} seconds")


#Distributed Neural Networks
##1.	Decentralized and Local SGD – System Support (All-reduce, Asynchronous Parallelism)

In [None]:
import tensorflow as tf
import horovod.tensorflow as hvd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score

# Initialize Horovod
hvd.init()

# Pin GPU to be used to process local rank (one GPU per process)
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU')

# Generate synthetic data
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_clusters_per_class=2, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features for scikit-learn model
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the SGDClassifier model (local model)
local_model = SGDClassifier(loss='log', random_state=42)

# Wrap the local model with Horovod DistributedOptimizer
optimizer = hvd.DistributedOptimizer(tf.keras.optimizers.SGD(learning_rate=0.01))
local_model = tf.keras.estimator.model_to_estimator(local_model, optimizer=optimizer)

# Train the local model
local_model.train(input_fn=lambda: input_fn(X_train_scaled, y_train), steps=100)

# Synchronize the local models using Horovod's allreduce
hvd.allreduce(tf.constant(0, dtype=tf.float32))

# Evaluate the synchronized model on the test set
y_pred = list(local_model.predict(input_fn=lambda: input_fn(X_test_scaled)))
y_pred = [pred['class_ids'][0] for pred in y_pred]
accuracy = accuracy_score(y_test, y_pred)
print(f"Decentralized and Local SGD Accuracy: {accuracy}")


##2.	Large Scale Deep NN

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Generate synthetic data
X, y = make_classification(n_samples=10000, n_features=50, n_informative=25, n_clusters_per_class=2, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features for scikit-learn model
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert data to TensorFlow Dataset
train_dataset = tf.data.Dataset.from_tensor_slices((X_train_scaled, y_train)).shuffle(10000).batch(64)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test_scaled, y_test)).batch(64)

# Define a simple deep neural network model using Keras
def create_model():
    model = models.Sequential([
        layers.Dense(256, activation='relu', input_shape=(X_train_scaled.shape[1],)),
        layers.Dense(128, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Wrap the model with the distributed strategy
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    distributed_model = create_model()

# Train the model
distributed_model.fit(train_dataset, epochs=10)

# Evaluate the model on the test set
y_pred = distributed_model.predict(test_dataset)
y_pred_binary = (y_pred > 0.5).astype(int)
accuracy = accuracy_score(y_test, y_pred_binary)
print(f"Accuracy on test set: {accuracy}")


##3.	Systems for Federated Learning

In [None]:
import tensorflow as tf
import tensorflow_federated as tff
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Generate synthetic data
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_clusters_per_class=2, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features for scikit-learn model
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert data to TensorFlow Dataset
train_dataset = tf.data.Dataset.from_tensor_slices((X_train_scaled, y_train)).shuffle(100).batch(10)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test_scaled, y_test)).batch(10)

# Define a simple Keras model
def create_keras_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Wrap the Keras model with TFF
def create_tff_model():
    keras_model = create_keras_model()
    return tff.learning.from_keras_model(
        keras_model,
        input_spec=train_dataset.element_spec,
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=[tf.keras.metrics.BinaryAccuracy()])

# Create a Federated Averaging process
def create_federated_averaging_process(model):
    return tff.learning.build_federated_averaging_process(
        model,
        client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.02),
        server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))

# Simulate Federated Learning
federated_train_data = [train_dataset]  # For simplicity, using the same data on the client
tff_model = create_tff_model()
federated_averaging_process = create_federated_averaging_process(tff_model)
state = federated_averaging_process.initialize()

NUM_ROUNDS = 10
for round_num in range(NUM_ROUNDS):
    state, metrics = federated_averaging_process.next(state, federated_train_data)
    print(f'Round {round_num}: {metrics}')

# Evaluate the federated model on the test set
def evaluate_federated_model(state, test_dataset):
    evaluation = tff.learning.build_federated_evaluation(tff_model)
    return evaluation(state.model, [test_dataset])

test_metrics = evaluate_federated_model(state, test_dataset)
print(f'Test Metrics: {test_metrics}')


#**M4**
##ML Deployment on Constrained Systems I:
##1.	Model Compression, Compression vs. Inference

In [None]:
# Install necessary packages
# pip install scikit-learn tensorflow

# Import libraries
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn import datasets
from sklearn.decomposition import TruncatedSVD
import tensorflow as tf
import time

# Load dataset
digits = load_digits()
X, y = digits.data, digits.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a RandomForestClassifier (original model)
original_model = RandomForestClassifier(n_estimators=100, random_state=42)
original_model.fit(X_train, y_train)

# Evaluate original model
original_predictions = original_model.predict(X_test)
original_accuracy = accuracy_score(y_test, original_predictions)
print(f"Original Model Accuracy: {original_accuracy:.4f}")

# Compress the model using TruncatedSVD
n_components = 20
svd = TruncatedSVD(n_components=n_components)
X_train_compressed = svd.fit_transform(X_train)
X_test_compressed = svd.transform(X_test)

# Train a RandomForestClassifier on the compressed data
compressed_model = RandomForestClassifier(n_estimators=100, random_state=42)
compressed_model.fit(X_train_compressed, y_train)

# Evaluate compressed model
compressed_predictions = compressed_model.predict(X_test_compressed)
compressed_accuracy = accuracy_score(y_test, compressed_predictions)
print(f"Compressed Model Accuracy: {compressed_accuracy:.4f}")

# TensorFlow for inference
# Convert the compressed model to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_scikit_learn(model=compressed_model)
tflite_model = converter.convert()

# Save the TensorFlow Lite model to a file
with open('compressed_model.tflite', 'wb') as f:
    f.write(tflite_model)

# Load the TensorFlow Lite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()

# Run inference on a single example
input_tensor_index = interpreter.get_input_details()[0]['index']
output = interpreter.tensor(interpreter.get_output_details()[0]['index'])
input_example = X_test_compressed[0].reshape(1, -1).astype('float32')
interpreter.set_tensor(input_tensor_index, input_example)
start_time = time.time()
interpreter.invoke()
inference_time = time.time() - start_time

# Get the output and print the result
inference_result = output()[0]
print(f"\nInference Result: {inference_result}")
print(f"Inference Time: {inference_time:.4f} seconds")


##2.	Quantization and Learning with Limited Numerical Precision

In [None]:
# Install necessary packages
# pip install scikit-learn tensorflow

# Import libraries
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import tensorflow as tf
import time

# Load dataset
digits = load_digits()
X, y = digits.data, digits.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a RandomForestClassifier (original model)
original_model = RandomForestClassifier(n_estimators=100, random_state=42)
original_model.fit(X_train, y_train)

# Evaluate original model
original_predictions = original_model.predict(X_test)
original_accuracy = accuracy_score(y_test, original_predictions)
print(f"Original Model Accuracy: {original_accuracy:.4f}")

# TensorFlow for quantization
converter = tf.lite.TFLiteConverter.from_scikit_learn(model=original_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_tflite_model = converter.convert()

# Save the quantized TensorFlow Lite model to a file
with open('quantized_model.tflite', 'wb') as f:
    f.write(quantized_tflite_model)

# Load the TensorFlow Lite model and allocate tensors.
interpreter_quantized = tf.lite.Interpreter(model_content=quantized_tflite_model)
interpreter_quantized.allocate_tensors()

# Run inference on a single example
input_tensor_index_quantized = interpreter_quantized.get_input_details()[0]['index']
output_quantized = interpreter_quantized.tensor(interpreter_quantized.get_output_details()[0]['index'])
input_example_quantized = X_test[0].reshape(1, -1).astype('float32')
interpreter_quantized.set_tensor(input_tensor_index_quantized, input_example_quantized)
start_time_quantized = time.time()
interpreter_quantized.invoke()
inference_time_quantized = time.time() - start_time_quantized

# Get the output and print the result
inference_result_quantized = output_quantized()[0]
print(f"\nQuantized Inference Result: {inference_result_quantized}")
print(f"Quantized Inference Time: {inference_time_quantized:.4f} seconds")


#Neural Network Pruning
##1.	 Pruning of CNNs

In [None]:
# Install necessary packages
# pip install scikit-learn tensorflow

# Import libraries
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_digits
from sklearn.neural_network import MLPClassifier
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow_model_optimization.sparsity import keras as sparsity

# Load the digits dataset
digits = load_digits()
X, y = digits.data, digits.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocess the data
X_train = X_train.reshape(X_train.shape[0], 8, 8, 1).astype('float32') / 16.0
X_test = X_test.reshape(X_test.shape[0], 8, 8, 1).astype('float32') / 16.0

# Define and train a simple CNN model using Keras
model = Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(8, 8, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model.fit(X_train, y_train, epochs=20, validation_split=0.2, callbacks=[early_stopping])

# Evaluate the original model
original_predictions = np.argmax(model.predict(X_test), axis=1)
original_accuracy = accuracy_score(y_test, original_predictions)
print(f"Original Model Accuracy: {original_accuracy:.4f}")

# Apply pruning to the model
pruning_params = {
    'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=0.50, final_sparsity=0.80, begin_step=0, end_step=2000)
}

pruned_model = sparsity.prune_low_magnitude(model, **pruning_params)

# Compile the pruned model
pruned_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the pruned model
pruned_model.fit(X_train, y_train, epochs=20, validation_split=0.2, callbacks=[early_stopping])

# Evaluate the pruned model
pruned_predictions = np.argmax(pruned_model.predict(X_test), axis=1)
pruned_accuracy = accuracy_score(y_test, pruned_predictions)
print(f"Pruned Model Accuracy: {pruned_accuracy:.4f}")

# Save the pruned model
pruned_model.save('pruned_model.h5')


##Evaluation of Pruning

In [None]:
# Install necessary packages
# pip install scikit-learn tensorflow

# Import libraries
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_digits
from sklearn.neural_network import MLPClassifier
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import load_model
from tensorflow_model_optimization.sparsity import keras as sparsity

# Load the digits dataset
digits = load_digits()
X, y = digits.data, digits.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocess the data
X_train = X_train.reshape(X_train.shape[0], 8, 8, 1).astype('float32') / 16.0
X_test = X_test.reshape(X_test.shape[0], 8, 8, 1).astype('float32') / 16.0

# Load the original model
original_model = load_model('original_model.h5')

# Evaluate the original model
original_predictions = np.argmax(original_model.predict(X_test), axis=1)
original_accuracy = accuracy_score(y_test, original_predictions)
print(f"Original Model Accuracy: {original_accuracy:.4f}")
print("Classification Report for Original Model:")
print(classification_report(y_test, original_predictions))

# Load the pruned model
pruned_model = load_model('pruned_model.h5')

# Evaluate the pruned model
pruned_predictions = np.argmax(pruned_model.predict(X_test), axis=1)
pruned_accuracy = accuracy_score(y_test, pruned_predictions)
print(f"\nPruned Model Accuracy: {pruned_accuracy:.4f}")
print("Classification Report for Pruned Model:")
print(classification_report(y_test, pruned_predictions))

# Compare the sparsity of the pruned model
pruned_params = pruned_model.get_config()['layers'][1]['config']['kernel_regularizer']['config']
final_sparsity = pruned_params['final_sparsity']
print(f"\nFinal Sparsity of Pruned Model: {final_sparsity:.4f}")


##3.	 Deep Compression: Leveraging quantization, pruning, and sparsity.

In [None]:
# Install necessary packages
# pip install scikit-learn tensorflow

# Import libraries
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_digits
from sklearn.neural_network import MLPClassifier
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, load_model
from tensorflow_model_optimization.sparsity import keras as sparsity

# Load the digits dataset
digits = load_digits()
X, y = digits.data, digits.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocess the data
X_train = X_train.reshape(X_train.shape[0], 8, 8, 1).astype('float32') / 16.0
X_test = X_test.reshape(X_test.shape[0], 8, 8, 1).astype('float32') / 16.0

# Define and train a simple CNN model using Keras
model = Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(8, 8, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the original model
model.fit(X_train, y_train, epochs=10, validation_split=0.2)

# Evaluate the original model
original_predictions = np.argmax(model.predict(X_test), axis=1)
original_accuracy = accuracy_score(y_test, original_predictions)
print(f"Original Model Accuracy: {original_accuracy:.4f}")
print("Classification Report for Original Model:")
print(classification_report(y_test, original_predictions))

# Quantization of the model
quantize_model = sparsity.prune_low_magnitude(model)

# Compile the quantized model
quantize_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the quantized model
quantize_model.fit(X_train, y_train, epochs=10, validation_split=0.2)

# Evaluate the quantized model
quantized_predictions = np.argmax(quantize_model.predict(X_test), axis=1)
quantized_accuracy = accuracy_score(y_test, quantized_predictions)
print(f"\nQuantized Model Accuracy: {quantized_accuracy:.4f}")
print("Classification Report for Quantized Model:")
print(classification_report(y_test, quantized_predictions))

# Save the quantized model
quantize_model.save('quantized_model.h5')


#ML Deployment on Constrained Systems II:
##1.	TinyML and TensorFlow Lite;

In [None]:
# Install necessary packages
# pip install scikit-learn tensorflow

# Import libraries
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
import tensorflow as tf
from tensorflow import lite

# Load the digits dataset
digits = load_digits()
X, y = digits.data, digits.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a RandomForestClassifier (original model)
original_model = RandomForestClassifier(n_estimators=100, random_state=42)
original_model.fit(X_train, y_train)

# Evaluate original model
original_predictions = original_model.predict(X_test)
original_accuracy = accuracy_score(y_test, original_predictions)
print(f"Original Model Accuracy: {original_accuracy:.4f}")

# Convert the model to TensorFlow Lite format
converter = lite.TFLiteConverter.from_scikit_learn(model=original_model)
tflite_model = converter.convert()

# Save the TensorFlow Lite model to a file
with open('model.tflite', 'wb') as f:
    f.write(tflite_model)

# Load the TensorFlow Lite model and allocate tensors.
interpreter = lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()

# Run inference on a single example
input_tensor_index = interpreter.get_input_details()[0]['index']
output = interpreter.tensor(interpreter.get_output_details()[0]['index'])
input_example = X_test[0].reshape(1, -1).astype('float32')
interpreter.set_tensor(input_tensor_index, input_example)
interpreter.invoke()

# Get the output and print the result
inference_result = output()[0]
predicted_class = np.argmax(inference_result)
print(f"\nInference Result: {inference_result}")
print(f"Predicted Class: {predicted_class}")


##2.	Energy Constraints – Adapting Algorithms for Constrained Devices;

In [None]:
# Install necessary packages
# pip install scikit-learn tensorflow

# Import libraries
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow import lite

# Load the digits dataset
digits = load_digits()
X, y = digits.data, digits.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a RandomForestClassifier (original model)
original_model = RandomForestClassifier(n_estimators=100, random_state=42)
original_model.fit(X_train, y_train)

# Evaluate original model
original_predictions = original_model.predict(X_test)
original_accuracy = accuracy_score(y_test, original_predictions)
print(f"Original Model Accuracy: {original_accuracy:.4f}")

# Convert the model to TensorFlow Lite format with quantization
converter = lite.TFLiteConverter.from_scikit_learn(model=original_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

# Save the TensorFlow Lite model to a file
with open('energy_constrained_model.tflite', 'wb') as f:
    f.write(tflite_model)

# Load the TensorFlow Lite model and allocate tensors.
interpreter = lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()

# Run inference on a single example
input_tensor_index = interpreter.get_input_details()[0]['index']
output = interpreter.tensor(interpreter.get_output_details()[0]['index'])
input_example = X_test[0].reshape(1, -1).astype('float32')
interpreter.set_tensor(input_tensor_index, input_example)
interpreter.invoke()

# Get the output and print the result
inference_result = output()[0]
predicted_class = np.argmax(inference_result)
print(f"\nInference Result: {inference_result}")
print(f"Predicted Class: {predicted_class}")


##3.	Assessing the tradeoffs - Accuracy of prediction, Model Size, Throughput, Response Time, Energy Consumption





In [None]:
# Install necessary packages
# pip install scikit-learn tensorflow

# Import libraries
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow import lite
import time

# Load the digits dataset
digits = load_digits()
X, y = digits.data, digits.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a RandomForestClassifier (original model)
original_model = RandomForestClassifier(n_estimators=100, random_state=42)
original_model.fit(X_train, y_train)

# Evaluate original model
original_predictions = original_model.predict(X_test)
original_accuracy = accuracy_score(y_test, original_predictions)
print(f"Original Model Accuracy: {original_accuracy:.4f}")

# Convert the model to TensorFlow Lite format with quantization
converter = lite.TFLiteConverter.from_scikit_learn(model=original_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

# Save the TensorFlow Lite model to a file
with open('tradeoff_model.tflite', 'wb') as f:
    f.write(tflite_model)

# Load the TensorFlow Lite model and allocate tensors.
interpreter = lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()

# Measure inference time
num_samples = 100
inference_times = []

for _ in range(num_samples):
    input_tensor_index = interpreter.get_input_details()[0]['index']
    output = interpreter.tensor(interpreter.get_output_details()[0]['index'])
    input_example = X_test[0].reshape(1, -1).astype('float32')
    interpreter.set_tensor(input_tensor_index, input_example)

    start_time = time.time()
    interpreter.invoke()
    inference_time = time.time() - start_time
    inference_times.append(inference_time)

# Calculate average inference time
average_inference_time = np.mean(inference_times)
print(f"\nAverage Inference Time: {average_inference_time:.4f} seconds")

# Assess tradeoffs
model_size = len(tflite_model) / (1024 * 1024)  # in megabytes
throughput = num_samples / average_inference_time  # predictions per second
response_time = average_inference_time  # seconds
energy_consumption = average_inference_time  # a simplified measure, considering energy per second

print(f"\nTradeoff Assessment:")
print(f"Model Size: {model_size:.4f} MB")
print(f"Throughput: {throughput:.4f} predictions/second")
print(f"Response Time: {response_time:.4f} seconds")
print(f"Energy Consumption: {energy_consumption:.4f} J (assuming constant energy consumption)")
