In [None]:
from google.colab import files
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import STL
from scipy.stats import entropy
import ruptures as rpt
import seaborn as sns  
from sklearn.linear_model import LinearRegression

In [None]:

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

In [None]:
# Get the first uploaded file
file_name = list(uploaded.keys())[0]

# Load the CSV file into a DataFrame
df = pd.read_csv(file_name, delim_whitespace=True)
df.head()


In [None]:
# === STL Decomposition ===
print("Starting STL decomposition...")
seasonal_period = 1440
target_column = 'Mem_used'  # Defining target column
clean_series = df[target_column].dropna()

stl = STL(clean_series, period=seasonal_period)
stl_result = stl.fit()

df['trend'] = stl_result.trend
df['seasonal'] = stl_result.seasonal
df['residual'] = stl_result.resid

print("STL decomposition completed.")

In [None]:
def remove_warmup(df_original, cutoff_time_seconds=600):
    print(f"Original dataset size: {df_original.shape[0]} rows")

    df_warmup = df_original[df_original['Elapsed_time'] < cutoff_time_seconds].copy()
    df_main = df_original[df_original['Elapsed_time'] >= cutoff_time_seconds].copy()
    df_main.reset_index(drop=True, inplace=True)

    print(f"Removed warm-up data: {df_warmup.shape[0]} rows")
    print(f"New main dataset size: {df_main.shape[0]} rows")

    return df_main, df_warmup

df, df_warmup_isolated = remove_warmup(df, cutoff_time_seconds=600)

print("\n--- Warm-up removal process completed. ---")
print("The DataFrame 'df' now contains only data from 600 seconds onwards.")


In [None]:
# === TREND VISUALIZATION ===
plt.figure(figsize=(14, 5))
plt.plot(df['Elapsed_time'], df['trend'], label='Trend', color='orange')
plt.title('Used Memory Trend')
plt.xlabel('Time (s)')
plt.ylabel('Memory (trend)')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
window_size = 720
step_size = 360
labels = np.zeros(len(df))
trend = df['trend'].values

# Sliding window to label aging periods based on trend slope
for i in range(0, len(trend) - window_size, step_size):
    y_window = trend[i:i+window_size].reshape(-1, 1)
    x_window = np.arange(window_size).reshape(-1, 1)
    model = LinearRegression().fit(x_window, y_window)
    slope = model.coef_[0][0]

    # Label as 1 if slope exceeds threshold
    if slope > 0.5:
        labels[i:i+window_size] = 1

# Add aging label column to DataFrame
df['Aging_Label'] = labels

In [None]:
df

In [None]:
plt.figure(figsize=(14, 6))
plt.plot(df['Elapsed_time'], df[target_column], label=f'Original Signal of {target_column}')
plt.plot(df['Elapsed_time'], df['trend'], label='Trend', color='orange')
plt.fill_between(
    df['Elapsed_time'],
    df[target_column],
    df[target_column].max(),
    where=df['Aging_Label'] == 1,
    color='red',
    alpha=0.3,
    label='Detected Aging'
)
plt.title('Original Time Series with Aging Detection')
plt.xlabel('Elapsed Time (s)')
plt.ylabel(f'Memory {target_column}')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
df

In [None]:
# Ensure warm-up DataFrame has same columns as main DataFrame
final_columns = df.columns
df_warmup_adjusted = df_warmup_isolated.reindex(columns=final_columns, fill_value=pd.NA)
df_warmup_adjusted['Aging_Label'] = 0

# Concatenate warm-up and main DataFrame
df = pd.concat([df_warmup_adjusted, df], ignore_index=True)

# Sort by elapsed time
df = df.sort_values(by='Elapsed_time')

print("Final unified, clean, and sorted DataFrame:")
display(df.head(10))

print("\nUnique values of 'Aging_Label' in the final DataFrame:")
print(df['Aging_Label'].unique())


In [None]:
plt.figure(figsize=(14, 6))
plt.plot(df['Elapsed_time'], df[target_column], label=f'Original Signal of {target_column}')
plt.plot(df['Elapsed_time'], df['trend'], label='Trend', color='orange')
plt.fill_between(
    df['Elapsed_time'],
    df[target_column],
    df[target_column].max(),
    where=df['Aging_Label'] == 1,
    color='red',
    alpha=0.3,
    label='Detected Aging'
)
plt.title('Original Time Series with Aging Detection')
plt.xlabel('Elapsed Time (s)')
plt.ylabel(f'Memory {target_column}')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
corr_table = df.corr(numeric_only=True)
print("Correlation Table:")
print(corr_table)

In [None]:
plt.figure(figsize=(12, 10))
sns.heatmap(corr_table, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Heatmap')
plt.show()

In [None]:
print(df['Aging_Label'].value_counts(normalize=True))

In [None]:
# Get base filename without extension
base_filename = file_name.rsplit('.', 1)[0]
csv_filename = f'{base_filename}.csv'

# Save DataFrame to CSV
df.to_csv(csv_filename, index=False)

# Download the CSV file
files.download(csv_filename)