In [4]:
import pandas as pd
import random
from datetime import datetime, timedelta
import matplotlib.pyplot as plt

data = {
    'key': ['A', 'B', 'C', 'D', 'E', 'A', 'B', 'C', 'D', 'E', 'A', 'B', 'C', 'D', 'E'],
    'tag': ['tag1', 'tag2', 'tag3', 'tag4', 'tag5', 'tag1', 'tag2', 'tag3', 'tag4', 'tag5', 'tag1', 'tag2', 'tag3', 'tag4', 'tag5'],
    'class': ['class1', 'class2', 'class3', 'class4', 'class5', 'class1', 'class2', 'class3', 'class4', 'class5', 'class1', 'class2', 'class3', 'class4', 'class5'],
    'status': ['active', 'inactive', 'active', 'inactive', 'active', 'active', 'inactive', 'active', 'inactive', 'active', 'active', 'inactive', 'active', 'inactive', 'active'],
    'rendomdates': ["2024-10-02", "2024-10-07", "2024-10-12", "2024-10-17", "2024-10-22", "2024-10-27", "2024-11-01", "2024-11-06", "2024-11-11", "2024-11-16", "2024-11-21", "2024-11-26", "2024-11-21", "2024-11-26", "2024-11-30"],
    'size': [1, 2, 3, 4, 5, 4, 3, 5, 5, 2, 6, 2, 1, 3, 5]
}

# Generate random start and end times
start_times = []
end_times = []
for date in data['rendomdates']:
    start_time = datetime.strptime(date, '%Y-%m-%d')
    end_time = start_time + timedelta(hours=random.randint(1, 12), minutes=random.randint(0, 59), seconds=random.randint(0, 59))
    start_times.append(start_time)
    end_times.append(end_time)

data['start_time'] = start_times
data['end_time'] = end_times

df = pd.DataFrame(data)

df['time_difference'] = (df['end_time'] - df['start_time']).dt.total_seconds() / 60

grouped = df.groupby(['key', 'class', 'status', 'rendomdates'])

def format_time(seconds):
    try:
        minutes = seconds / 60
        return round(minutes, 2)
    except:
        return 0

def percentile_90(data):
    return data.quantile(0.9)

time_diff_stats = grouped['time_difference'].agg(['min', 'max', 'mean', 'median', 'std', 'size', percentile_90])
time_diff_stats['min'] = time_diff_stats['min'].apply(format_time)
time_diff_stats['max'] = time_diff_stats['max'].apply(format_time)
time_diff_stats['mean'] = time_diff_stats['mean'].apply(format_time)
time_diff_stats['median'] = time_diff_stats['median'].apply(format_time)
time_diff_stats['std'] = time_diff_stats['std'].apply(format_time)

print(time_diff_stats.head())

dates = time_diff_stats.index.get_level_values('rendomdates').unique()
mean_values = time_diff_stats['mean']
sizes = time_diff_stats['size']
print("Number of dates:", len(dates))
print("Number of mean values:", len(mean_values))
print("Dates:", dates)
print("Mean values:", mean_values)

# Convert dates to datetime objects
dates = pd.to_datetime(dates)

oct_dates = dates[dates.month == 10]
oct_mean_values = mean_values.loc[oct_dates]
oct_sizes = sizes.loc[oct_dates]

# Plot
fig, ax1 = plt.subplots(figsize=(10, 6))

ax1.plot(oct_dates, oct_mean_values, marker='o', linestyle='-', color='b', label='Mean Time Difference')
ax1.set_xlabel('Date')
ax1.set_ylabel('Mean Time Difference (minutes)', color='b')
ax1.tick_params(axis='y', labelcolor='b')
ax1.set_xticks(oct_dates)
ax1.set_xticklabels(oct_dates.strftime('%Y-%m-%d'), rotation=45)

# Create a second y-axis
ax2 = ax1.twinx()
ax2.plot(oct_dates, oct_sizes, marker='s', linestyle='--', color='r', label='Size')
ax2.set_ylabel('Size', color='r')
ax2.tick_params(axis='y', labelcolor='r')

fig.tight_layout()
plt.title('Mean Time Difference and Size Over Dates in October')
plt.grid(True)
plt.show()


                                   min    max   mean  median  std  size  \
key class  status   rendomdates                                           
A   class1 active   2024-10-02   12.64  12.64  12.64   12.64  NaN     1   
                    2024-10-27    7.22   7.22   7.22    7.22  NaN     1   
                    2024-11-21    8.31   8.31   8.31    8.31  NaN     1   
B   class2 inactive 2024-10-07   11.62  11.62  11.62   11.62  NaN     1   
                    2024-11-01    5.38   5.38   5.38    5.38  NaN     1   

                                 percentile_90  
key class  status   rendomdates                 
A   class1 active   2024-10-02      758.500000  
                    2024-10-27      433.300000  
                    2024-11-21      498.883333  
B   class2 inactive 2024-10-07      697.466667  
                    2024-11-01      322.950000  
Number of dates: 13
Number of mean values: 15
Dates: Index(['2024-10-02', '2024-10-27', '2024-11-21', '2024-10-07', '2024-11-01',
 

KeyError: "DatetimeIndex(['2024-10-02', '2024-10-27', '2024-10-07', '2024-10-12',\n               '2024-10-17', '2024-10-22'],\n              dtype='datetime64[ns]', name='rendomdates', freq=None) not in index"