Converting Time Series Data to Images

In [2]:
# Importing necessary imports
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np
import math

In [3]:
# Loading Data
data = pd.read_excel("C:/Users/srian/Documents/GitHub/Generative-AI-project/Centrifugal_pumps_measurements.xlsx")

data

Unnamed: 0,Machine_ID,value_ISO,value_DEMO,value_ACC,value_P2P,valueTEMP,minute,second,year,month,day,hour,Pump_Status
0,1,0.349609,0.000343,0.012512,0.042480,24.25,33,41,2022,12,7,12,Healthy
1,1,0.349609,0.000343,0.012512,0.042480,24.25,33,46,2022,12,7,12,Healthy
2,1,0.349609,0.000343,0.012512,0.042480,24.25,33,51,2022,12,7,12,Healthy
3,1,0.349609,0.000343,0.012512,0.042480,24.25,33,56,2022,12,7,12,Healthy
4,1,0.349609,0.000343,0.012512,0.042480,24.25,34,1,2022,12,7,12,Healthy
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5109,2,0.023193,0.000277,0.000637,0.006409,23.00,11,47,2022,12,14,15,Not Healthy
5110,2,0.023193,0.000277,0.000637,0.006409,23.00,11,52,2022,12,14,15,Not Healthy
5111,2,0.023193,0.000277,0.000637,0.006409,23.00,11,57,2022,12,14,15,Not Healthy
5112,2,0.023193,0.000277,0.000637,0.006409,23.00,12,2,2022,12,14,15,Not Healthy


In [None]:
# def time_series_to_images(data, output_dir, grid_size=(2, 3)):
#     os.makedirs(output_dir, exist_ok=True)
#     feature_columns = [col for col in data.columns if col not in ['Pump_Status', 'Machine_ID']]

#     for idx, row in data.iterrows():
#         num_features = len(feature_columns)
#         grid_rows = math.ceil(math.sqrt(num_features))
#         grid_cols = math.ceil(num_features / grid_rows)
        
#         fig, axes = plt.subplots(grid_rows, grid_cols, figsize=(10, 10))
#         axes = axes.flatten()
        
#         for i, feature in enumerate(feature_columns):
#             axes[i].plot(row[feature])
#             axes[i].set_title(feature)
#             axes[i].axis('off')
        
#         for j in range(len(feature_columns), len(axes)):
#             fig.delaxes(axes[j])
        
#         label = 1 if row['Pump_Status'] == 'Healthy' else 0
#         plt.tight_layout()
#         plt.savefig(f"{output_dir}/sample_{idx}_label_{label}.png")
#         plt.close(fig)

# # Ensure Pump_Status is encoded numerically
# data['Pump_Status'] = data['Pump_Status'].apply(lambda x: 1 if x == 'Healthy' else 0)

# # Example Usage
# output_dir = "time_series_images"
# time_series_to_images(data, output_dir, grid_size=(2, 3))  # Adjust grid_size if needed

In [4]:
print(data['Pump_Status'].unique())


['Healthy' 'Not Healthy']


In [5]:
data['Pump_Status'] = data['Pump_Status'].apply(
    lambda x: 1 if str(x).strip().lower() == 'healthy' else 0
)


In [6]:
print(data['Pump_Status'].isnull().sum())  # Check for missing values


0


In [7]:
# Drop rows with missing Pump_Status values
data = data.dropna(subset=['Pump_Status'])
data

Unnamed: 0,Machine_ID,value_ISO,value_DEMO,value_ACC,value_P2P,valueTEMP,minute,second,year,month,day,hour,Pump_Status
0,1,0.349609,0.000343,0.012512,0.042480,24.25,33,41,2022,12,7,12,1
1,1,0.349609,0.000343,0.012512,0.042480,24.25,33,46,2022,12,7,12,1
2,1,0.349609,0.000343,0.012512,0.042480,24.25,33,51,2022,12,7,12,1
3,1,0.349609,0.000343,0.012512,0.042480,24.25,33,56,2022,12,7,12,1
4,1,0.349609,0.000343,0.012512,0.042480,24.25,34,1,2022,12,7,12,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5109,2,0.023193,0.000277,0.000637,0.006409,23.00,11,47,2022,12,14,15,0
5110,2,0.023193,0.000277,0.000637,0.006409,23.00,11,52,2022,12,14,15,0
5111,2,0.023193,0.000277,0.000637,0.006409,23.00,11,57,2022,12,14,15,0
5112,2,0.023193,0.000277,0.000637,0.006409,23.00,12,2,2022,12,14,15,0


In [8]:
import os
import matplotlib.pyplot as plt
import math

def time_series_to_images(data, output_dir, grid_size=(2, 3)):
    """
    Converts time series data into grid-based line graph images for each sample.
    Args:
    - data: Pandas DataFrame, each row is a time series sample.
    - output_dir: Directory to save images.
    - grid_size: Grid dimensions for feature plots (rows, cols).
    """
    os.makedirs(output_dir, exist_ok=True)
    feature_columns = [col for col in data.columns if col not in ['Pump_Status', 'Machine_ID']]

    for idx, row in data.iterrows():
        num_features = len(feature_columns)
        grid_rows = math.ceil(math.sqrt(num_features))
        grid_cols = math.ceil(num_features / grid_rows)

        fig, axes = plt.subplots(grid_rows, grid_cols, figsize=(10, 10))
        axes = axes.flatten()

        for i, feature in enumerate(feature_columns):
            axes[i].plot(row[feature])
            axes[i].set_title(feature)
            axes[i].axis('off')

        for j in range(len(feature_columns), len(axes)):
            fig.delaxes(axes[j])

        # Encode Pump_Status during saving
        try:
            label = 1 if row['Pump_Status'] == 'Healthy' else 0
            plt.tight_layout()
            plt.savefig(f"{output_dir}/sample_{idx}_label_{label}.png")
            plt.close(fig)
        except Exception as e:
            print(f"Error saving file for index {idx}: {e}")

In [None]:
output_dir = "time_series_images"
time_series_to_images(data, output_dir, grid_size=(2, 3))