In [None]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt

from datetime import datetime, timedelta
from tqdm import tqdm

In [None]:
with open('../generated/timestamps.json') as f:
    timestamps = json.load(f)

In [None]:
data = []

for auction_id, auction in tqdm(timestamps.items()):
    first_appearance = datetime.strptime(auction['first_appearance'], '%Y-%m-%d %H:%M:%S')
    last_appearance = datetime.strptime(auction['last_appearance'], '%Y-%m-%d %H:%M:%S')

    hours_on_sale = (last_appearance - first_appearance).total_seconds() / 3600

    data.append((
        auction_id,
        auction['first_appearance'],
        auction['last_appearance'],
        auction['item_id'],
        hours_on_sale
    ))

df = pd.DataFrame(data, columns=['auction_id', 'first_appearance', 'last_appearance', 'item_id', 'hours_on_sale'])

df['first_appearance'] = pd.to_datetime(df['first_appearance'])
df['last_appearance'] = pd.to_datetime(df['last_appearance'])

df.head()

In [None]:
# Save to csv in generated/
df.to_csv('../generated/auction_durations.csv', index=False)

In [None]:
# Load from csv
df = pd.read_csv('../generated/auction_durations.csv')
print(df.shape)
df.head()

In [None]:
def calculate_weekly_average(df):
    # Convert first_appearance to datetime if it's not already
    df['first_appearance'] = pd.to_datetime(df['first_appearance'])
    df['last_appearance'] = pd.to_datetime(df['last_appearance'])
    
    # For each row, we want to get sales from 2-9 days before
    weekly_averages = {}
    
    for index, row in df.iterrows():
        item_id = row['item_id']
        current_date = row['first_appearance']
        
        # Define the date range for previous week (2-9 days ago)
        end_date = current_date - pd.Timedelta(days=2)
        start_date = current_date - pd.Timedelta(days=9)
        
        # Filter data for this item in the previous week
        mask = (
            (df['item_id'] == item_id) & 
            (df['first_appearance'] >= start_date) & 
            (df['first_appearance'] <= end_date)
        )
        
        prev_week_sales = df[mask]
        
        # Calculate average if there were any sales
        if len(prev_week_sales) > 0:
            weekly_averages[index] = prev_week_sales['hours_on_sale'].mean()
        else:
            weekly_averages[index] = None  # or some default value
    
    # Add the weekly averages as a new column
    df['prev_week_avg'] = pd.Series(weekly_averages)
    
    return df

# Use the function
df_with_averages = calculate_weekly_average(df)
df_with_averages.head()