In [None]:
import platform
from hdbcli import dbapi
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import os
import time
import matplotlib.pyplot as plt
import import_ipynb

def format_column_values(column):
    return ', '.join(f"'{value}'" for value in column)

def calculate_minutes_between(datetime1, datetime2):
    # Ensure the inputs are numpy.datetime64 objects
    if not isinstance(datetime1, np.datetime64) or not isinstance(datetime2, np.datetime64):
        raise ValueError("Both inputs must be numpy.datetime64 objects.")
    
    # Calculate the difference between the two datetime objects
    time_difference = datetime2 - datetime1
    
    # Convert the difference to minutes
    minutes_difference = time_difference / np.timedelta64(1, 'm')
    
    # Return the absolute value of the minutes difference
    return round(minutes_difference, 2)

def plot_pipeline_same_pu(df, folder, name):
    # Convert PIPELINERUNSEQ to string and extract last 5 characters
    df['PIPELINERUNSEQ'] = df['PIPELINERUNSEQ'].astype(str).str[-5:]
    
    # Calculate duration between STOPTIME and STARTTIME
    df['DURATION'] = (pd.to_datetime(df['STOPTIME']) - pd.to_datetime(df['STARTTIME'])).dt.total_seconds() / 60  # duration in minutes
    
    # Determine the number of data points
    num_points = len(df)
    
    # Adjust figure size based on the number of data points
    fig_width = max(10, num_points * 0.8)  # Minimum width of 10, increase by 0.8 per data point
    fig_height = 6  # Fixed height
    
    # Create a figure and axis
    fig, ax1 = plt.subplots(figsize=(fig_width, fig_height))
    
    # Plot TRANSACTIONS, POSITIONS, and CREDITS on line chart
    ax1.plot(df['PIPELINERUNSEQ'], df['TRANSACTIONS'], label='Transactions', marker='o')
    ax1.plot(df['PIPELINERUNSEQ'], df['POSITIONS'], label='Positions', marker='o')
    ax1.plot(df['PIPELINERUNSEQ'], df['CREDITS'], label='Credits', marker='o')
    
    # Annotate the values on the line chart
    for i, row in df.iterrows():
        ax1.annotate(f"{row['TRANSACTIONS']}", (row['PIPELINERUNSEQ'], row['TRANSACTIONS']), textcoords="offset points", xytext=(0,10), ha='center')
        ax1.annotate(f"{row['POSITIONS']}", (row['PIPELINERUNSEQ'], row['POSITIONS']), textcoords="offset points", xytext=(0,10), ha='center')
        ax1.annotate(f"{row['CREDITS']}", (row['PIPELINERUNSEQ'], row['CREDITS']), textcoords="offset points", xytext=(0,10), ha='center')
    
    # Set labels and title for line chart
    ax1.set_xlabel('Pipeline Run Seq (Last 5 digits)')
    ax1.set_ylabel('Values')
    ax1.set_title('Pipeline Data')
    ax1.legend(loc='lower left', bbox_to_anchor=(1, 1))
    
    # Create a second y-axis for the bar chart
    ax2 = ax1.twinx()
    
    # Plot DURATION on bar chart
    ax2.bar(df['PIPELINERUNSEQ'], df['DURATION'], alpha=0.8, color='gray', label='Duration (min)')
    
    # Annotate the values on the bar chart
    for i, row in df.iterrows():
        ax2.annotate(f"{row['DURATION']:.1f}", (row['PIPELINERUNSEQ'], row['DURATION']), textcoords="offset points", xytext=(0,0), ha='center')
    
    # Set label for bar chart
    ax2.set_ylabel('Duration (minutes)')
    ax2.legend(loc='upper left', bbox_to_anchor=(1, 1))
    
    # Show the plot
    plt.tight_layout()
    plt.savefig(f'{folder}/{name}.png', dpi=300)