In [54]:
import pandas as pd
from datetime import datetime

# Sample data for Product table
product_data = {
    "product_id": [1, 2, 3],
    "product_name": ["LC Phone", "LC T-Shirt", "LC Keychain"]
}
product_df = pd.DataFrame(product_data)

# Sample data for Sales table
sales_data = {
    "product_id": [1, 2, 3],
    "period_start": ["2019-01-25", "2018-12-01", "2019-12-01"],
    "period_end":   ["2019-02-28", "2020-01-01", "2020-01-31"],
    "average_daily_sales": [100, 10, 1]
}
sales_df = pd.DataFrame(sales_data)

# Convert period_start and period_end to datetime
sales_df["period_start"] = pd.to_datetime(sales_df["period_start"])
sales_df["period_end"] = pd.to_datetime(sales_df["period_end"])

display(product_df, sales_df)

Unnamed: 0,product_id,product_name
0,1,LC Phone
1,2,LC T-Shirt
2,3,LC Keychain


Unnamed: 0,product_id,period_start,period_end,average_daily_sales
0,1,2019-01-25,2019-02-28,100
1,2,2018-12-01,2020-01-01,10
2,3,2019-12-01,2020-01-31,1


In [55]:
years = ["2018", "2019", "2020"]
year_ranges = pd.DataFrame({
    'report_year': years,
    'year_start': pd.to_datetime([year+"-01-01" for year in years]),
    'year_end': pd.to_datetime([year+"-12-31" for year in years])
})
display(year_ranges)

Unnamed: 0,report_year,year_start,year_end
0,2018,2018-01-01,2018-12-31
1,2019,2019-01-01,2019-12-31
2,2020,2020-01-01,2020-12-31


In [56]:
sales['key'] = 1
year_ranges['key'] = 1
df = pd.merge(sales, year_ranges, on='key').drop('key', axis=1)
display(df)

Unnamed: 0,product_id,period_start,period_end,average_daily_sales,report_year,year_start,year_end
0,1,2019-01-25,2019-02-28,100,2018,2018-01-01,2018-12-31
1,1,2019-01-25,2019-02-28,100,2019,2019-01-01,2019-12-31
2,1,2019-01-25,2019-02-28,100,2020,2020-01-01,2020-12-31
3,2,2018-12-01,2020-01-01,10,2018,2018-01-01,2018-12-31
4,2,2018-12-01,2020-01-01,10,2019,2019-01-01,2019-12-31
5,2,2018-12-01,2020-01-01,10,2020,2020-01-01,2020-12-31
6,3,2019-12-01,2020-01-31,1,2018,2018-01-01,2018-12-31
7,3,2019-12-01,2020-01-31,1,2019,2019-01-01,2019-12-31
8,3,2019-12-01,2020-01-31,1,2020,2020-01-01,2020-12-31


In [57]:
df['overlap_start'] = df[['period_start', 'year_start']].max(axis=1)
df['overlap_end'] = df[['period_end', 'year_end']].min(axis=1)
df['overlap_days'] = (df['overlap_end'] - df['overlap_start']).dt.days + 1
display(df)

Unnamed: 0,product_id,period_start,period_end,average_daily_sales,report_year,year_start,year_end,overlap_start,overlap_end,overlap_days
0,1,2019-01-25,2019-02-28,100,2018,2018-01-01,2018-12-31,2019-01-25,2018-12-31,-24
1,1,2019-01-25,2019-02-28,100,2019,2019-01-01,2019-12-31,2019-01-25,2019-02-28,35
2,1,2019-01-25,2019-02-28,100,2020,2020-01-01,2020-12-31,2020-01-01,2019-02-28,-306
3,2,2018-12-01,2020-01-01,10,2018,2018-01-01,2018-12-31,2018-12-01,2018-12-31,31
4,2,2018-12-01,2020-01-01,10,2019,2019-01-01,2019-12-31,2019-01-01,2019-12-31,365
5,2,2018-12-01,2020-01-01,10,2020,2020-01-01,2020-12-31,2020-01-01,2020-01-01,1
6,3,2019-12-01,2020-01-31,1,2018,2018-01-01,2018-12-31,2019-12-01,2018-12-31,-334
7,3,2019-12-01,2020-01-31,1,2019,2019-01-01,2019-12-31,2019-12-01,2019-12-31,31
8,3,2019-12-01,2020-01-31,1,2020,2020-01-01,2020-12-31,2020-01-01,2020-01-31,31


In [58]:
df = df[df['overlap_days'] > 0]
display(df)

Unnamed: 0,product_id,period_start,period_end,average_daily_sales,report_year,year_start,year_end,overlap_start,overlap_end,overlap_days
1,1,2019-01-25,2019-02-28,100,2019,2019-01-01,2019-12-31,2019-01-25,2019-02-28,35
3,2,2018-12-01,2020-01-01,10,2018,2018-01-01,2018-12-31,2018-12-01,2018-12-31,31
4,2,2018-12-01,2020-01-01,10,2019,2019-01-01,2019-12-31,2019-01-01,2019-12-31,365
5,2,2018-12-01,2020-01-01,10,2020,2020-01-01,2020-12-31,2020-01-01,2020-01-01,1
7,3,2019-12-01,2020-01-31,1,2019,2019-01-01,2019-12-31,2019-12-01,2019-12-31,31
8,3,2019-12-01,2020-01-31,1,2020,2020-01-01,2020-12-31,2020-01-01,2020-01-31,31


In [59]:
df['total_amount'] = df['overlap_days'] * df['average_daily_sales']
display(df)

Unnamed: 0,product_id,period_start,period_end,average_daily_sales,report_year,year_start,year_end,overlap_start,overlap_end,overlap_days,total_amount
1,1,2019-01-25,2019-02-28,100,2019,2019-01-01,2019-12-31,2019-01-25,2019-02-28,35,3500
3,2,2018-12-01,2020-01-01,10,2018,2018-01-01,2018-12-31,2018-12-01,2018-12-31,31,310
4,2,2018-12-01,2020-01-01,10,2019,2019-01-01,2019-12-31,2019-01-01,2019-12-31,365,3650
5,2,2018-12-01,2020-01-01,10,2020,2020-01-01,2020-12-31,2020-01-01,2020-01-01,1,10
7,3,2019-12-01,2020-01-31,1,2019,2019-01-01,2019-12-31,2019-12-01,2019-12-31,31,31
8,3,2019-12-01,2020-01-31,1,2020,2020-01-01,2020-12-31,2020-01-01,2020-01-31,31,31


In [60]:
df = df.groupby(['product_id', 'report_year'])['total_amount'].sum().reset_index()
display(df)

Unnamed: 0,product_id,report_year,total_amount
0,1,2019,3500
1,2,2018,310
2,2,2019,3650
3,2,2020,10
4,3,2019,31
5,3,2020,31


In [61]:
df = pd.merge(df, product, on='product_id')
display(df)

Unnamed: 0,product_id,report_year,total_amount,product_name
0,1,2019,3500,LC Phone
1,2,2018,310,LC T-Shirt
2,2,2019,3650,LC T-Shirt
3,2,2020,10,LC T-Shirt
4,3,2019,31,LC Keychain
5,3,2020,31,LC Keychain


In [62]:
df = df[['product_id', 'product_name', 'report_year', 'total_amount']]
display(df)

Unnamed: 0,product_id,product_name,report_year,total_amount
0,1,LC Phone,2019,3500
1,2,LC T-Shirt,2018,310
2,2,LC T-Shirt,2019,3650
3,2,LC T-Shirt,2020,10
4,3,LC Keychain,2019,31
5,3,LC Keychain,2020,31


In [63]:
df = df.sort_values(by=['product_id', 'report_year']).reset_index(drop=True)
display(df)

Unnamed: 0,product_id,product_name,report_year,total_amount
0,1,LC Phone,2019,3500
1,2,LC T-Shirt,2018,310
2,2,LC T-Shirt,2019,3650
3,2,LC T-Shirt,2020,10
4,3,LC Keychain,2019,31
5,3,LC Keychain,2020,31


https://leetcode.com/problems/total-sales-amount-by-year/description/?lang=pythondata

In [65]:
# years = ["2018", "2019", "2020"]
# year_ranges = pd.DataFrame({
#     'report_year': years,
#     'year_start': pd.to_datetime([year+"-01-01" for year in years]),
#     'year_end': pd.to_datetime([year+"-12-31" for year in years])
# })
# sales['key'] = 1
# year_ranges['key'] = 1
# df = pd.merge(sales, year_ranges, on='key').drop('key', axis=1)

# df['overlap_start'] = df[['period_start', 'year_start']].max(axis=1)
# df['overlap_end'] = df[['period_end', 'year_end']].min(axis=1)
# df['overlap_days'] = (df['overlap_end'] - df['overlap_start']).dt.days + 1

# df = df[df['overlap_days'] > 0]

# df['total_amount'] = df['overlap_days'] * df['average_daily_sales']

# df = df.groupby(['product_id', 'report_year'])['total_amount'].sum().reset_index()

# df = df.merge(product, on='product_id')

# df = df[['product_id', 'product_name', 'report_year', 'total_amount']]

# df = df.sort_values(by=['product_id', 'report_year']).reset_index(drop=True)

# df

Unnamed: 0,product_id,product_name,report_year,total_amount
0,1,LC Phone,2019,3500
1,2,LC T-Shirt,2018,310
2,2,LC T-Shirt,2019,3650
3,2,LC T-Shirt,2020,10
4,3,LC Keychain,2019,31
5,3,LC Keychain,2020,31
