In [None]:
import pandas as pd

# Step 1: Data Loading and Merging
item_info_path = r'..\dataset\item_info.csv'
transactions_info_path = r'..\dataset\transactions_info.csv'

item_info = pd.read_csv(item_info_path)
transactions_info = pd.read_csv(transactions_info_path)

# Merge data based on 'item_code'
merged_data = pd.merge(transactions_info, item_info, on='item_code', how='inner')

# Step 2: Create Primary Keys (Item | Date | Hour)
merged_data['invoice_time'] = pd.to_datetime(merged_data['invoice_time'])
merged_data['hour'] = merged_data['invoice_time'].dt.hour

# Step 3: Create Target Variable
merged_data['hourly_sales'] = merged_data.groupby(['item_code', 'hour'])['item_qty'].transform('sum')

# Step 4: Sales Related Features
sales_related_features = merged_data.groupby(['item_code', 'hour'])['hourly_sales'].transform('mean')
merged_data['avg_hourly_sales'] = sales_related_features

# Step 5: Item Related Features (You can add more features based on item_info)
merged_data['category'] = merged_data['item_category']

# Step 6: Time Related Features
merged_data['hour_of_day'] = merged_data['hour']
merged_data['day_of_week'] = merged_data['invoice_time'].dt.dayofweek
merged_data['month'] = merged_data['invoice_time'].dt.month

# Save the master table with synthetic features
master_table_path = r'..\src\utils\master_table.csv'
merged_data.to_csv(master_table_path, index=False)
