# ML Libraries Performance and GPU Utilization Check

This script checks the installation and performance of various machine learning libraries. It assesses both CPU and GPU performance (if available) for each library.

## Import Required Libraries

We begin by importing necessary libraries and modules.

In [4]:
import sys
import time
import torch
import tensorflow as tf
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
import warnings

# Suppressing warnings, especially for TensorFlow
warnings.filterwarnings('ignore')

Thu Dec  7 08:43:26 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.29.06              Driver Version: 546.17       CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4080        On  | 00000000:01:00.0  On |                  N/A |
| 30%   40C    P0              41W / 288W |   3292MiB / 16376MiB |      2%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

## Performance Measurement Functions

For each library, we define a function to measure its performance on both CPU and GPU.


## GPU Availability Check

We also check if the GPU is available for each library.

In [18]:
# Nvidia Check
!nvidia-smi

# Performance Measurement for PyTorch
def measure_performance_torch(device, size=1000):
    start_time = time.time()
    a = torch.rand(size, size, device=device)
    b = torch.rand(size, size, device=device)
    torch.matmul(a, b)
    end_time = time.time()
    return end_time - start_time

# Performance Measurement for TensorFlow
def measure_performance_tensorflow(device_name, size=1000):
    with tf.device(device_name):
        start_time = time.time()
        a = tf.random.normal([size, size])
        b = tf.random.normal([size, size])
        tf.matmul(a, b)
        end_time = time.time()
        return end_time - start_time

# Performance Measurement for Scikit-learn
def measure_performance_sklearn(size=1000):
    X, y = make_classification(n_samples=size, n_features=20, n_classes=2, random_state=42)
    clf = RandomForestClassifier()
    start_time = time.time()
    clf.fit(X, y)
    end_time = time.time()
    return end_time - start_time

# Check GPU support for libraries
def check_gpu_support(library):
    if library == 'torch' and torch.cuda.is_available():
        return True, torch.cuda.get_device_name(0)
    elif library == 'tensorflow' and tf.config.list_physical_devices('GPU'):
        return True, None
    else:
        return False, None

# Main function to measure and display performance
def main():
    libraries_to_test = ["torch", "tensorflow", "scikit-learn"]  # Libraries to test
    num_tests = 10  # Number of tests per library
    results = []  # Storing results

    for lib in libraries_to_test:
        gpu_available, gpu_name = check_gpu_support(lib)
        for i in range(num_tests):
            if lib == 'torch':
                cpu_time = measure_performance_torch("cpu")
                gpu_time = measure_performance_torch("cuda") if gpu_available else None
            elif lib == 'tensorflow':
                cpu_time = measure_performance_tensorflow("/cpu:0")
                gpu_time = measure_performance_tensorflow("/gpu:0") if gpu_available else None
            elif lib == 'scikit-learn':
                cpu_time = measure_performance_sklearn()
                gpu_time = None
            
            # Store the results
            results.append({
                "Library": lib,
                "Test": i + 1,
                "CPU Time (ms)": cpu_time * 1000,  # Convert to milliseconds
                "GPU Time (ms)": gpu_time * 1000 if gpu_time else "N/A",  # Convert to milliseconds
                "GPU Available": gpu_available,
                "GPU Name": gpu_name
            })

    # Print results in a tabular format
    print_table(results)

# Function to print results in a table format
def print_table(data):
    # Determine the maximum width needed for each column
    column_widths = {
        "Library": max(len(row["Library"]) for row in data),
        "Test": len("Test"),
        "CPU Time (ms)": max(len(f"{row['CPU Time (ms)']:.6f}") for row in data),
        "GPU Time (ms)": max(len(str(row["GPU Time (ms)"])) for row in data),
        "Speedup": len("Speedup")
    }

    # Headers
    headers = ["Library", "Test", "CPU Time (ms)", "GPU Time (ms)", "Speedup"]
    header_row = "| " + " | ".join(headers[i].ljust(column_widths[headers[i]]) for i in range(len(headers))) + " |"

    # Separator
    separator = "+-" + "-+-".join(["-" * column_widths[header] for header in headers]) + "-+"

    # Print the table
    print(separator)
    print(header_row)
    print(separator)
    for row in data:
        cpu_time = row["CPU Time (ms)"]
        gpu_time = row["GPU Time (ms)"]
        # Calculate speedup
        if gpu_time != "N/A" and float(gpu_time) > 0:
            speedup = cpu_time / float(gpu_time)
            speedup_text = f"{speedup:.2f}x" if speedup >= 1 else f"{100 * (1 - gpu_time / cpu_time):.2f}% faster"
        else:
            speedup_text = "N/A"

        formatted_row = "| " + " | ".join([
            row["Library"].ljust(column_widths["Library"]),
            str(row["Test"]).ljust(column_widths["Test"]),
            f"{row['CPU Time (ms)']:.6f}".ljust(column_widths["CPU Time (ms)"]),
            str(row["GPU Time (ms)"]).ljust(column_widths["GPU Time (ms)"]),
            speedup_text.ljust(column_widths["Speedup"])
        ]) + " |"
        print(formatted_row)
    print(separator)

if __name__ == "__main__":
    main()


Thu Dec  7 09:06:09 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 545.29.06              Driver Version: 546.17       CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 4080        On  | 00000000:01:00.0  On |                  N/A |
| 30%   39C    P0              40W / 288W |  15963MiB / 16376MiB |      2%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

### Loading all essential libraries including Cudf to speed up Pandas

In [None]:
# CUDF - GPU Dataframe
# cudf is a GPU DataFrame library for loading, joining, aggregating, filtering, and otherwise manipulating tabular data using a DataFrame style API.    
  
%load_ext cudf.pandas

# Importing libraries
#=============================
import cudf
import pandas as pd
import numpy as np  
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import folium
#=============================


### Get data 