# M1	Introduction and Context
1	ML and DL:
1.	Performance:


a.	Metrics: Time Complexity of Algorithms and Running Time; Memory, Response Time


In [None]:
import time
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import tensorflow as tf

# Function to measure the time complexity
def measure_time_complexity(data_size):
    X, y = datasets.make_classification(n_samples=data_size, n_features=20, random_state=42)
    start_time = time.time()
    # Your algorithm with time complexity measurement here
    elapsed_time = time.time() - start_time
    return elapsed_time

# Function to measure the running time
def measure_running_time(X_train, y_train, X_test, y_test):
    start_time = time.time()
    # Your algorithm with running time measurement here
    model = SVC()
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    elapsed_time = time.time() - start_time
    return elapsed_time, accuracy

# Main function
def main():
    # Generate sample data
    X, y = datasets.make_classification(n_samples=1000, n_features=20, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Measure time complexity
    data_size = 1000
    time_complexity = measure_time_complexity(data_size)
    print(f"Time complexity for {data_size} data points: {time_complexity} seconds")

    # Measure running time
    running_time, accuracy = measure_running_time(X_train, y_train, X_test, y_test)
    print(f"Running time: {running_time} seconds")
    print(f"Accuracy: {accuracy}")

if __name__ == "__main__":
    main()

b.	Scaling and Tuning of Performance


In [None]:
import time
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Function to perform hyperparameter tuning
def perform_grid_search(X_train_scaled, y_train):
    param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [0.01, 0.1, 1, 10]}
    svm_model = SVC()
    grid_search = GridSearchCV(svm_model, param_grid, cv=3)
    start_time = time.time()
    grid_search.fit(X_train_scaled, y_train)
    elapsed_time = time.time() - start_time
    best_params = grid_search.best_params_
    return best_params, elapsed_time

# Function to scale input features
def scale_features(X_train, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled

# Function to train a neural network
def train_neural_network(X_train_scaled, y_train):
    model = Sequential()
    model.add(Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    start_time = time.time()
    model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, verbose=0)
    elapsed_time = time.time() - start_time
    return model, elapsed_time

# Main function
def main():
    # Generate sample data
    X, y = datasets.make_classification(n_samples=1000, n_features=20, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Scale features
    X_train_scaled, X_test_scaled = scale_features(X_train, X_test)

    # Perform hyperparameter tuning
    best_params, grid_search_time = perform_grid_search(X_train_scaled, y_train)
    print(f"Best hyperparameters: {best_params}")
    print(f"Grid search time: {grid_search_time} seconds")

    # Train a neural network
    model, training_time = train_neural_network(X_train_scaled, y_train)
    print(f"Neural network training time: {training_time} seconds")

    # Evaluate the model
    X_test_scaled = scaler.transform(X_test)
    predictions = model.predict(X_test_scaled)
    predictions = (predictions > 0.5).astype(int)
    accuracy = accuracy_score(y_test, predictions)
    print(f"Neural network accuracy: {accuracy}")

if __name__ == "__main__":
    main()


2.	Environments:


a.	Training vs. Deployment


In [None]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import joblib

# Function to train a classifier
def train_classifier(X_train, y_train, model_type='random_forest'):
    if model_type == 'random_forest':
        model = RandomForestClassifier(n_estimators=100, random_state=42)
    elif model_type == 'neural_network':
        model = Sequential()
        model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    else:
        raise ValueError("Invalid model type")

    model.fit(X_train, y_train)
    return model

# Function to deploy a trained model
def deploy_model(model, X):
    if isinstance(model, RandomForestClassifier):
        predictions = model.predict(X)
    elif isinstance(model, Sequential):
        predictions = (model.predict(X) > 0.5).astype(int).flatten()
    else:
        raise ValueError("Invalid model type")

    return predictions

# Main function
def main():
    # Generate sample data
    X, y = datasets.make_classification(n_samples=1000, n_features=20, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train a random forest classifier
    rf_model = train_classifier(X_train, y_train, model_type='random_forest')

    # Save the trained model
    joblib.dump(rf_model, 'random_forest_model.joblib')

    # Deploy the trained model on new data (in a different script or environment)
    loaded_rf_model = joblib.load('random_forest_model.joblib')
    predictions = deploy_model(loaded_rf_model, X_test)

    # Evaluate the deployed model
    accuracy = accuracy_score(y_test, predictions)
    print(f"Random Forest Model Accuracy: {accuracy}")

if __name__ == "__main__":
    main()


b.	Range of Systems:
Distributed and Cloud, Embedded and Mobile.



In [None]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.externals import joblib  # For scikit-learn version <= 0.22
# For scikit-learn version >= 0.23, use: from joblib import dump, load
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Function to train a classifier
def train_classifier(X_train, y_train, model_type='random_forest'):
    if model_type == 'random_forest':
        model = RandomForestClassifier(n_estimators=100, random_state=42)
    elif model_type == 'neural_network':
        model = Sequential()
        model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    else:
        raise ValueError("Invalid model type")

    model.fit(X_train, y_train)
    return model

# Function to save the trained model
def save_model(model, filename):
    if isinstance(model, RandomForestClassifier):
        joblib.dump(model, filename)
    elif isinstance(model, Sequential):
        model.save(filename)
    else:
        raise ValueError("Invalid model type")

# Distributed/Cloud Environment Training
def train_cloud_model():
    # Generate sample data
    X, y = datasets.make_classification(n_samples=1000, n_features=20, random_state=42)
    X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train a random forest classifier
    rf_model = train_classifier(X_train, y_train, model_type='random_forest')

    # Save the trained model
    save_model(rf_model, 'cloud_model.joblib')

# Mobile/Embedded Deployment
def deploy_mobile_model(X_test):
    # Load the trained model
    loaded_rf_model = joblib.load('cloud_model.joblib')

    # Deploy the model on mobile/embedded system
    predictions = loaded_rf_model.predict(X_test)

    return predictions

# Main function
def main():
    # Distributed/Cloud Environment Training
    train_cloud_model()

    # Mobile/Embedded Deployment
    # In a real-world scenario, X_test_mobile would come from your mobile/embedded system
    X_test_mobile = np.random.rand(10, 20)  # Random test data for illustration
    predictions_mobile = deploy_mobile_model(X_test_mobile)

    print("Predictions on Mobile/Embedded System:", predictions_mobile)

if __name__ == "__main__":
    main()


2	Parallel and Distributed Algorithms:


1.	Systems and Performance;


In [None]:
import time
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Function to perform parallel processing with scikit-learn
def perform_parallel_processing(X_train, y_train):
    param_grid = {'n_estimators': [10, 50, 100, 200], 'max_depth': [None, 10, 20, 30]}
    rf_model = RandomForestClassifier()
    grid_search = GridSearchCV(rf_model, param_grid, cv=3, n_jobs=-1)

    start_time = time.time()
    grid_search.fit(X_train, y_train)
    elapsed_time = time.time() - start_time
    best_params = grid_search.best_params_
    return best_params, elapsed_time

# Function to perform distributed training with TensorFlow
def perform_distributed_training(X_train, y_train, strategy):
    with strategy.scope():
        model = Sequential()
        model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    start_time = time.time()
    model.fit(X_train, y_train, epochs=3, batch_size=32, verbose=0)
    elapsed_time = time.time() - start_time
    return model, elapsed_time

# Main function
def main():
    # Generate sample data
    X, y = datasets.make_classification(n_samples=1000, n_features=20, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Perform parallel processing with scikit-learn
    parallel_params, parallel_time = perform_parallel_processing(X_train, y_train)
    print(f"Best hyperparameters from parallel processing: {parallel_params}")
    print(f"Parallel processing time: {parallel_time} seconds")

    # Perform distributed training with TensorFlow
    strategy = tf.distribute.MirroredStrategy()
    print(f'Number of devices: {strategy.num_replicas_in_sync}')

    X_train_distributed, y_train_distributed = strategy.experimental_distribute_dataset((X_train, y_train))
    model, distributed_time = perform_distributed_training(X_train_distributed, y_train_distributed, strategy)
    print(f"Distributed training time: {distributed_time} seconds")

    # Evaluate the model
    predictions = model.predict(X_test)
    predictions = (predictions > 0.5).astype(int).flatten()
    accuracy = accuracy_score(y_test, predictions)
    print(f"Neural network accuracy: {accuracy}")

if __name__ == "__main__":
    main()


2.	Speedup – Approaches and Issues;


1. Parallelization with scikit-learn:

In [None]:
import time
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from joblib import parallel_backend

# Function to train a random forest classifier
def train_classifier(X_train, y_train, n_jobs=1):
    with parallel_backend('threading', n_jobs=n_jobs):
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)
    return model

# Main function
def main():
    # Generate sample data
    X, y = datasets.make_classification(n_samples=10000, n_features=20, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train a random forest classifier with parallelization
    start_time = time.time()
    rf_model = train_classifier(X_train, y_train, n_jobs=2)
    elapsed_time = time.time() - start_time

    # Evaluate the model
    predictions = rf_model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)

    print(f"Random Forest Training Time: {elapsed_time} seconds")
    print(f"Random Forest Accuracy: {accuracy}")

if __name__ == "__main__":
    main()


2. GPU Acceleration with TensorFlow:


In [None]:
import time
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Function to train a neural network
def train_neural_network(X_train, y_train, use_gpu=True):
    if use_gpu:
        physical_devices = tf.config.list_physical_devices('GPU')
        if physical_devices:
            tf.config.experimental.set_memory_growth(physical_devices[0], True)

    model = Sequential()
    model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    start_time = time.time()
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)
    elapsed_time = time.time() - start_time
    return model, elapsed_time

# Main function
def main():
    # Generate sample data
    X, y = datasets.make_classification(n_samples=10000, n_features=20, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train a neural network with GPU acceleration
    start_time = time.time()
    model, training_time = train_neural_network(X_train, y_train, use_gpu=True)
    elapsed_time = time.time() - start_time

    # Evaluate the model
    predictions = (model.predict(X_test) > 0.5).astype(int).flatten()
    accuracy = accuracy_score(y_test, predictions)

    print(f"Neural Network Training Time: {elapsed_time} seconds")
    print(f"Neural Network Accuracy: {accuracy}")

if __name__ == "__main__":
    main()


3.	Data Parallelism vs. Task Parallelism vs. Request Parallelism.


Data Parallelism with scikit-learn:

In [None]:
import time
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from joblib import Parallel, delayed

# Function to train a random forest classifier on a subset of data
def train_classifier_subset(X_subset, y_subset):
    model = RandomForestClassifier(n_estimators=50, random_state=42)
    model.fit(X_subset, y_subset)
    return model

# Function to perform data parallelism
def data_parallelism(X_train, y_train, n_jobs=1):
    num_samples = X_train.shape[0]
    samples_per_job = num_samples // n_jobs

    # Split the data into subsets for parallel processing
    data_subsets = [(X_train[i:i+samples_per_job], y_train[i:i+samples_per_job])
                    for i in range(0, num_samples, samples_per_job)]

    # Train the classifier on each subset in parallel
    models = Parallel(n_jobs=n_jobs)(delayed(train_classifier_subset)(*subset) for subset in data_subsets)

    # Ensemble the models (e.g., average predictions for RandomForest)
    return models

# Main function
def main():
    # Generate sample data
    X, y = datasets.make_classification(n_samples=10000, n_features=20, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Perform data parallelism
    start_time = time.time()
    models = data_parallelism(X_train, y_train, n_jobs=2)
    elapsed_time = time.time() - start_time

    # Aggregate predictions for ensemble models
    predictions = np.mean([model.predict(X_test) for model in models], axis=0)
    predictions = (predictions > 0.5).astype(int)

    # Evaluate the ensemble model
    accuracy = accuracy_score(y_test, predictions)
    print(f"Ensemble Model Training Time: {elapsed_time} seconds")
    print(f"Ensemble Model Accuracy: {accuracy}")

if __name__ == "__main__":
    main()


Task Parallelism with TensorFlow:

In [None]:
import time
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Function to train a neural network on a separate task
def train_neural_network_task(X_subset, y_subset):
    model = Sequential()
    model.add(Dense(64, input_dim=X_subset.shape[1], activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_subset, y_subset, epochs=3, batch_size=32, verbose=0)
    return model

# Function to perform task parallelism
def task_parallelism(X_train, y_train, num_tasks=2):
    num_samples = X_train.shape[0]
    samples_per_task = num_samples // num_tasks

    # Split the data into subsets for parallel processing
    data_subsets = [(X_train[i:i+samples_per_task], y_train[i:i+samples_per_task])
                    for i in range(0, num_samples, samples_per_task)]

    # Train the neural network on each subset as a separate task
    models = [train_neural_network_task(*subset) for subset in data_subsets]

    # Combine the models if needed

    return models

# Main function
def main():
    # Generate sample data
    X, y = datasets.make_classification(n_samples=10000, n_features=20, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Perform task parallelism
    start_time = time.time()
    models = task_parallelism(X_train, y_train, num_tasks=2)
    elapsed_time = time.time() - start_time

    # Aggregate predictions or perform further tasks if needed

    print(f"Task Parallelism Training Time: {elapsed_time} seconds")

if __name__ == "__main__":
    main()


Request parallelism, often associated with handling multiple independent requests simultaneously, is not explicitly demonstrated in the provided examples as it's more relevant in deployment scenarios like serving models in a web service.

4.	Scale-out Clusters – Cost of communication and impact on Speedup



In [None]:
import time
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.externals import joblib  # For scikit-learn version <= 0.22

# Function to train a random forest classifier on a subset of data
def train_classifier_subset(X_subset, y_subset):
    model = RandomForestClassifier(n_estimators=50, random_state=42)
    model.fit(X_subset, y_subset)
    return model

# Function to perform data parallelism
def data_parallelism(X_train, y_train, n_jobs=1):
    num_samples = X_train.shape[0]
    samples_per_job = num_samples // n_jobs

    # Split the data into subsets for parallel processing
    data_subsets = [(X_train[i:i+samples_per_job], y_train[i:i+samples_per_job])
                    for i in range(0, num_samples, samples_per_job)]

    # Train the classifier on each subset in parallel
    models = joblib.Parallel(n_jobs=n_jobs)(
        joblib.delayed(train_classifier_subset)(*subset) for subset in data_subsets
    )

    # Ensemble the models (e.g., average predictions for RandomForest)
    return models

# Function to perform distributed training with TensorFlow on a cluster
def perform_distributed_training(X_train, y_train, strategy):
    with strategy.scope():
        model = Sequential()
        model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    start_time = time.time()
    model.fit(X_train, y_train, epochs=3, batch_size=32, verbose=0)
    elapsed_time = time.time() - start_time
    return model, elapsed_time

# Main function
def main():
    # Generate sample data
    X, y = datasets.make_classification(n_samples=10000, n_features=20, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Perform data parallelism with scikit-learn
    start_time = time.time()
    models = data_parallelism(X_train, y_train, n_jobs=2)
    elapsed_time_data_parallelism = time.time() - start_time

    # Aggregate predictions for ensemble models
    predictions_data_parallelism = np.mean([model.predict(X_test) for model in models], axis=0)
    predictions_data_parallelism = (predictions_data_parallelism > 0.5).astype(int)

    # Evaluate the ensemble model
    accuracy_data_parallelism = accuracy_score(y_test, predictions_data_parallelism)
    print(f"Ensemble Model (Data Parallelism) Training Time: {elapsed_time_data_parallelism} seconds")
    print(f"Ensemble Model (Data Parallelism) Accuracy: {accuracy_data_parallelism}")

    # Perform distributed training with TensorFlow on a cluster
    strategy = tf.distribute.MirroredStrategy()
    print(f'Number of devices: {strategy.num_replicas_in_sync}')

    X_train_distributed, y_train_distributed = strategy.experimental_distribute_dataset((X_train, y_train))
    model, elapsed_time_distributed_training = perform_distributed_training(X_train_distributed, y_train_distributed, strategy)

    # Evaluate the model
    predictions_distributed_training = (model.predict(X_test) > 0.5).astype(int).flatten()
    accuracy_distributed_training = accuracy_score(y_test, predictions_distributed_training)
    print(f"Neural Network Training Time (Distributed): {elapsed_time_distributed_training} seconds")
    print(f"Neural Network Accuracy (Distributed): {accuracy_distributed_training}")

if __name__ == "__main__":
    main()


3	Modern Systems:
1.	Parallel Execution on Multicore processors and GPGPUs


Parallel Execution on Multicore Processors:

In [None]:
import time
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from joblib import Parallel, delayed

# Function to train a random forest classifier on a subset of data
def train_classifier_subset(X_subset, y_subset):
    model = RandomForestClassifier(n_estimators=50, random_state=42)
    model.fit(X_subset, y_subset)
    return model

# Function to perform parallel execution on multicore processors
def parallel_execution(X_train, y_train, n_jobs=1):
    num_samples = X_train.shape[0]
    samples_per_job = num_samples // n_jobs

    # Split the data into subsets for parallel processing
    data_subsets = [(X_train[i:i+samples_per_job], y_train[i:i+samples_per_job])
                    for i in range(0, num_samples, samples_per_job)]

    # Train the classifier on each subset in parallel
    models = Parallel(n_jobs=n_jobs)(
        delayed(train_classifier_subset)(*subset) for subset in data_subsets
    )

    # Ensemble the models (e.g., average predictions for RandomForest)
    return models

# Main function
def main():
    # Generate sample data
    X, y = datasets.make_classification(n_samples=10000, n_features=20, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Perform parallel execution on multicore processors
    start_time = time.time()
    models = parallel_execution(X_train, y_train, n_jobs=2)
    elapsed_time = time.time() - start_time

    # Aggregate predictions for ensemble models
    predictions = np.mean([model.predict(X_test) for model in models], axis=0)
    predictions = (predictions > 0.5).astype(int)

    # Evaluate the ensemble model
    accuracy = accuracy_score(y_test, predictions)
    print(f"Ensemble Model Training Time: {elapsed_time} seconds")
    print(f"Ensemble Model Accuracy: {accuracy}")

if __name__ == "__main__":
    main()


GPGPU Acceleration with TensorFlow:

In [None]:
import time
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Function to train a neural network
def train_neural_network(X_train, y_train, use_gpu=True):
    if use_gpu:
        physical_devices = tf.config.list_physical_devices('GPU')
        if physical_devices:
            tf.config.experimental.set_memory_growth(physical_devices[0], True)

    model = Sequential()
    model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    start_time = time.time()
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)
    elapsed_time = time.time() - start_time
    return model, elapsed_time

# Main function
def main():
    # Generate sample data
    X, y = datasets.make_classification(n_samples=10000, n_features=20, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train a neural network with GPGPU acceleration
    start_time = time.time()
    model, training_time = train_neural_network(X_train, y_train, use_gpu=True)
    elapsed_time = time.time() - start_time

    # Evaluate the model
    predictions = (model.predict(X_test) > 0.5).astype(int).flatten()
    accuracy = accuracy_score(y_test, predictions)

    print(f"Neural Network Training Time: {elapsed_time} seconds")
    print(f"Neural Network Accuracy: {accuracy}")

if __name__ == "__main__":
    main()


2.	Distributed Execution on Clusters:
(CPU and GPU clusters) -   Data Distribution Strategies


Distributed Execution on CPU Cluster:

In [None]:
import time
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from joblib import Parallel, delayed
import multiprocessing

# Function to train a random forest classifier on a subset of data
def train_classifier_subset(X_subset, y_subset):
    model = RandomForestClassifier(n_estimators=50, random_state=42)
    model.fit(X_subset, y_subset)
    return model

# Function to perform data parallelism
def data_parallelism(X_train, y_train, n_jobs=1):
    num_samples = X_train.shape[0]
    samples_per_job = num_samples // n_jobs

    # Split the data into subsets for parallel processing
    data_subsets = [(X_train[i:i+samples_per_job], y_train[i:i+samples_per_job])
                    for i in range(0, num_samples, samples_per_job)]

    # Train the classifier on each subset in parallel
    models = Parallel(n_jobs=n_jobs)(
        delayed(train_classifier_subset)(*subset) for subset in data_subsets
    )

    # Ensemble the models (e.g., average predictions for RandomForest)
    return models

# Main function
def main():
    # Generate sample data
    X, y = datasets.make_classification(n_samples=10000, n_features=20, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define the number of CPU cores available in the cluster
    num_cores = multiprocessing.cpu_count()

    # Perform data parallelism on the CPU cluster
    start_time = time.time()
    models = data_parallelism(X_train, y_train, n_jobs=num_cores)
    elapsed_time = time.time() - start_time

    # Aggregate predictions for ensemble models
    predictions = np.mean([model.predict(X_test) for model in models], axis=0)
    predictions = (predictions > 0.5).astype(int)

    # Evaluate the ensemble model
    accuracy = accuracy_score(y_test, predictions)
    print(f"Ensemble Model (Data Parallelism) Training Time: {elapsed_time} seconds")
    print(f"Ensemble Model (Data Parallelism) Accuracy: {accuracy}")

if __name__ == "__main__":
    main()


Distributed Execution on GPU Cluster with TensorFlow:

In [None]:
import time
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Function to train a neural network
def train_neural_network(X_train, y_train, strategy):
    with strategy.scope():
        model = Sequential()
        model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    start_time = time.time()
    model.fit(X_train, y_train, epochs=3, batch_size=32, verbose=0)
    elapsed_time = time.time() - start_time
    return model, elapsed_time

# Main function
def main():
    # Generate sample data
    X, y = datasets.make_classification(n_samples=10000, n_features=20, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define the GPU cluster configuration
    # Note: Adapt this based on the specific GPU cluster setup
    cluster_spec = tf.train.ClusterSpec({
        'worker': ['worker1:2222', 'worker2:2222']  # Specify worker nodes and ports
    })

    # Create a TensorFlow server
    server = tf.distribute.Server(cluster_spec, job_name='worker', task_index=0)

    # Check if the current process is the chief (master) process
    is_chief = (server.task_index == 0)

    # Create a MirroredStrategy for distributed training
    strategy = tf.distribute.MirroredStrategy()

    # Perform distributed training on the GPU cluster
    if is_chief:
        print(f'Number of devices: {strategy.num_replicas_in_sync}')

    X_train_distributed, y_train_distributed = strategy.experimental_distribute_dataset((X_train, y_train))
    model, elapsed_time_distributed_training = train_neural_network(X_train_distributed, y_train_distributed, strategy)

    # Evaluate the model
    predictions_distributed_training = (model.predict(X_test) > 0.5).astype(int).flatten()
    accuracy_distributed_training = accuracy_score(y_test, predictions_distributed_training)
    print(f"Neural Network Training Time (Distributed): {elapsed_time_distributed_training} seconds")
    print(f"Neural Network Accuracy (Distributed): {accuracy_distributed_training}")

if __name__ == "__main__":
    main()


# M2 Parallel / Distributed ML algorithms - Overview and Techniques
Parallel / Distributed ML algorithms - Overview and Techniques:
1.	CNN


In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn import datasets
import tensorflow as tf
from tensorflow.keras import layers, models

# Load a sample dataset (e.g., CIFAR-10)
(X, y), _ = datasets.cifar10.load_data()
X = X.astype(np.float32) / 255.0
y = LabelEncoder().fit_transform(y.flatten())

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the CNN model using TensorFlow
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

# Evaluate the model
y_pred = np.argmax(model.predict(X_test), axis=1)
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy}")


2.	Gradient Descent and Stochastic Gradient Descent


In [None]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
import tensorflow as tf
from tensorflow.keras import layers, models

# Generate a synthetic dataset
X, y = make_regression(n_samples=1000, n_features=1, noise=30, random_state=42)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Gradient Descent using scikit-learn
gd_model = SGDRegressor(learning_rate='constant', eta0=0.01, max_iter=100)
gd_model.fit(X_train_scaled, y_train)

# Stochastic Gradient Descent using scikit-learn
sgd_model = SGDRegressor(learning_rate='invscaling', max_iter=100)
sgd_model.fit(X_train_scaled, y_train)

# Gradient Descent using TensorFlow
model = models.Sequential()
model.add(layers.Dense(1, input_dim=1, activation=None, kernel_initializer='zeros', bias_initializer='zeros'))
optimizer = tf.optimizers.SGD(learning_rate=0.01)
model.compile(optimizer=optimizer, loss='mean_squared_error')
model.fit(X_train_scaled, y_train, epochs=100, verbose=0)

# Evaluate models
gd_predictions = gd_model.predict(X_test_scaled)
sgd_predictions = sgd_model.predict(X_test_scaled)
tf_predictions = model.predict(X_test_scaled).flatten()

# Print the coefficients and performance metrics
print("Gradient Descent Coefficients:", gd_model.coef_, gd_model.intercept_)
print("Stochastic Gradient Descent Coefficients:", sgd_model.coef_, sgd_model.intercept_)
print("TensorFlow Gradient Descent Coefficients:", model.layers[0].get_weights())

# Compare predictions
print("Performance Metrics:")
print("Gradient Descent MSE:", np.mean((y_test - gd_predictions) ** 2))
print("Stochastic Gradient Descent MSE:", np.mean((y_test - sgd_predictions) ** 2))
print("TensorFlow Gradient Descent MSE:", np.mean((y_test - tf_predictions) ** 2))


3.	SVM


In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras import layers, models

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# SVM using scikit-learn
svm_model = SVC(kernel='linear', C=1)
svm_model.fit(X_train_scaled, y_train)
svm_predictions = svm_model.predict(X_test_scaled)
svm_accuracy = accuracy_score(y_test, svm_predictions)
print("SVM (scikit-learn) Accuracy:", svm_accuracy)

# SVM using TensorFlow
model = models.Sequential()
model.add(layers.Dense(3, input_dim=4, activation='softmax'))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_scaled, y_train, epochs=50, verbose=0)

# Evaluate TensorFlow SVM
tf_predictions = np.argmax(model.predict(X_test_scaled), axis=1)
tf_accuracy = accuracy_score(y_test, tf_predictions)
print("SVM (TensorFlow) Accuracy:", tf_accuracy)


4.	k-Means


In [None]:
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import layers, models

# Generate synthetic data
X, y = make_blobs(n_samples=300, centers=3, cluster_std=0.70, random_state=0)

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# k-Means using scikit-learn
kmeans_sklearn = KMeans(n_clusters=3, random_state=0)
kmeans_sklearn.fit(X_scaled)
labels_sklearn = kmeans_sklearn.predict(X_scaled)

# k-Means using TensorFlow
model = models.Sequential()
model.add(layers.InputLayer(input_shape=(2,)))
model.add(layers.Dense(3, activation=None))
model.compile(optimizer='adam', loss='kld')
model.fit(X_scaled, X_scaled, epochs=100, verbose=0)

# Get cluster assignments from TensorFlow model
centroids_tf = model.layers[1].get_weights()[0]
distances_tf = tf.norm(X_scaled[:, None] - centroids_tf, axis=-1)
labels_tf = tf.argmin(distances_tf, axis=-1).numpy()

# Print cluster assignments
print("Cluster Assignments (scikit-learn):", labels_sklearn)
print("Cluster Assignments (TensorFlow):", labels_tf)


5.	kNN


In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras import layers, models

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# kNN using scikit-learn
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(X_train_scaled, y_train)
knn_predictions = knn_model.predict(X_test_scaled)
knn_accuracy = accuracy_score(y_test, knn_predictions)
print("kNN (scikit-learn) Accuracy:", knn_accuracy)

# kNN using TensorFlow
model = models.Sequential()
model.add(layers.InputLayer(input_shape=(4,)))
model.add(layers.Dense(3, activation='softmax'))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train_scaled, y_train, epochs=50, verbose=0)

# Evaluate TensorFlow kNN
tf_predictions = np.argmax(model.predict(X_test_scaled), axis=1)
tf_accuracy = accuracy_score(y_test, tf_predictions)
print("kNN (TensorFlow) Accuracy:", tf_accuracy)


6.	Decision Trees/Random Forests.


In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras import layers, models

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Decision Tree using scikit-learn
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
dt_predictions = dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_predictions)
print("Decision Tree (scikit-learn) Accuracy:", dt_accuracy)

# Random Forest using scikit-learn
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
print("Random Forest (scikit-learn) Accuracy:", rf_accuracy)

# Decision Tree using TensorFlow
model_dt = models.Sequential()
model_dt.add(layers.InputLayer(input_shape=(4,)))
model_dt.add(layers.Dense(3, activation='softmax'))
model_dt.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_dt.fit(X_train, y_train, epochs=50, verbose=0)

# Evaluate TensorFlow Decision Tree
tf_dt_predictions = np.argmax(model_dt.predict(X_test), axis=1)
tf_dt_accuracy = accuracy_score(y_test, tf_dt_predictions)
print("Decision Tree (TensorFlow) Accuracy:", tf_dt_accuracy)

# Random Forest using TensorFlow
model_rf = models.Sequential()
model_rf.add(layers.InputLayer(input_shape=(4,)))
model_rf.add(layers.Dense(128, activation='relu'))
model_rf.add(layers.Dropout(0.5))
model_rf.add(layers.Dense(3, activation='softmax'))
model_rf.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_rf.fit(X_train, y_train, epochs=50, verbose=0)

# Evaluate TensorFlow Random Forest
tf_rf_predictions = np.argmax(model_rf.predict(X_test), axis=1)
tf_rf_accuracy = accuracy_score(y_test, tf_rf_predictions)
print("Random Forest (TensorFlow) Accuracy:", tf_rf_accuracy)
