Converting Time Series Data to Images

In [2]:
# Importing necessary imports
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np
import math

In [3]:
# Loading Data
data = pd.read_excel("C:/Users/srian/Documents/GitHub/Generative-AI-project/Centrifugal_pumps_measurements.xlsx")

data

Unnamed: 0,Machine_ID,value_ISO,value_DEMO,value_ACC,value_P2P,valueTEMP,minute,second,year,month,day,hour,Pump_Status
0,1,0.349609,0.000343,0.012512,0.042480,24.25,33,41,2022,12,7,12,Healthy
1,1,0.349609,0.000343,0.012512,0.042480,24.25,33,46,2022,12,7,12,Healthy
2,1,0.349609,0.000343,0.012512,0.042480,24.25,33,51,2022,12,7,12,Healthy
3,1,0.349609,0.000343,0.012512,0.042480,24.25,33,56,2022,12,7,12,Healthy
4,1,0.349609,0.000343,0.012512,0.042480,24.25,34,1,2022,12,7,12,Healthy
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5109,2,0.023193,0.000277,0.000637,0.006409,23.00,11,47,2022,12,14,15,Not Healthy
5110,2,0.023193,0.000277,0.000637,0.006409,23.00,11,52,2022,12,14,15,Not Healthy
5111,2,0.023193,0.000277,0.000637,0.006409,23.00,11,57,2022,12,14,15,Not Healthy
5112,2,0.023193,0.000277,0.000637,0.006409,23.00,12,2,2022,12,14,15,Not Healthy


In [6]:
def time_series_to_images(data, output_dir, grid_size=None):
    """
    Converts time series data into grid-based line graph images for each sample.
    Args:
    - data: Pandas DataFrame, each row is a time series sample.
    - output_dir: Directory to save images.
    - grid_size: Grid dimensions for feature plots (rows, cols). If None, calculate automatically.
    """
    os.makedirs(output_dir, exist_ok=True)
    feature_columns = [col for col in data.columns if col not in ['Pump_Status', 'Machine_ID', 'minute', 'second', 'year', 'month', 'day', 'hour']]

    # Dynamically determine grid size if not provided
    if grid_size is None:
        num_features = len(feature_columns)
        grid_rows = math.ceil(math.sqrt(num_features))
        grid_cols = math.ceil(num_features / grid_rows)
        grid_size = (grid_rows, grid_cols)

    for idx, row in data.iterrows():
        fig, axes = plt.subplots(grid_size[0], grid_size[1], figsize=(10, 10))
        axes = axes.flatten()  # Flatten axes array for easy indexing
        for i, feature in enumerate(feature_columns):
            axes[i].plot(row[feature])
            axes[i].set_title(feature)
            axes[i].axis('off')  # Turn off axes for cleaner visuals

        # Turn off unused axes
        for j in range(len(feature_columns), len(axes)):
            axes[j].axis('off')

        plt.tight_layout()
        plt.savefig(f"{output_dir}/sample_{idx}_label_{row['Pump_Status']}.png")
        plt.close(fig)

In [None]:
output_dir = "time_series_images"
time_series_to_images(data, output_dir)