In [54]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [55]:
excel_file = 'Invoice.xlsx'
sheet_name = '1'
columns = ['Invoice Date', 'Vendor ID', 'Invoice Amount']

df = pd.read_excel(excel_file, sheet_name=sheet_name, usecols=columns)

df = df.dropna()

print(df)

    Invoice Date  Vendor ID  Invoice Amount
0     2022-06-15      65670         1732.67
1     2022-08-14      65670         2045.03
2     2022-10-13      65670         1398.03
3     2022-12-12      65670         1316.33
4     2023-02-10      65670         1755.29
..           ...        ...             ...
858   2023-11-12      65769         1308.83
859   2023-12-12      65769         1469.97
860   2024-01-11      65769         1956.70
861   2024-02-10      65769         2221.93
862   2024-03-11      65769         1410.39

[863 rows x 3 columns]


In [56]:
# Preprocess dates

def date_to_num(date_obj):
    base_date = np.datetime64('2000-01-01')
    delta_days = (date_obj - base_date).days
    return delta_days + 1

def date_from_num(days_count):
    base_date = np.datetime64('2000-01-01')
    target_date = base_date + np.timedelta64(days_count - 1, 'D')
    formatted_date = target_date.astype(object).strftime('%d-%b-%Y')
    return formatted_date

In [57]:
df['Invoice Date'] = df['Invoice Date'].apply(date_to_num)

print(df)

     Invoice Date  Vendor ID  Invoice Amount
0            8202      65670         1732.67
1            8262      65670         2045.03
2            8322      65670         1398.03
3            8382      65670         1316.33
4            8442      65670         1755.29
..            ...        ...             ...
858          8717      65769         1308.83
859          8747      65769         1469.97
860          8777      65769         1956.70
861          8807      65769         2221.93
862          8837      65769         1410.39

[863 rows x 3 columns]


In [58]:
print(df.head())

   Invoice Date  Vendor ID  Invoice Amount
0          8202      65670         1732.67
1          8262      65670         2045.03
2          8322      65670         1398.03
3          8382      65670         1316.33
4          8442      65670         1755.29


In [68]:
# Find Potential Vendor ID's

# Define the start and end dates for April 2024
start_date = np.datetime64('2024-04-01')
end_date = np.datetime64('2024-04-30')

potential_vendors = set()

# Iterate through each day in April 2024
current_date = start_date
while current_date <= end_date:
    day_count = (current_date - np.datetime64('2000-01-01')).astype(int) + 1
    
    # Filter the dataframe for each date offset
    filtered_df1 = df[df['Invoice Date'] == day_count-15]
    filtered_df2 = df[df['Invoice Date'] == day_count-30]
    filtered_df3 = df[df['Invoice Date'] == day_count-45]
    filtered_df4 = df[df['Invoice Date'] == day_count-60]
    
    # Add unique Vendor IDs to potential_vendors set
    potential_vendors.update(set(filtered_df1['Vendor ID']))
    potential_vendors.update(set(filtered_df2['Vendor ID']))
    potential_vendors.update(set(filtered_df3['Vendor ID']))
    potential_vendors.update(set(filtered_df4['Vendor ID']))
    
    current_date += np.timedelta64(1, 'D')

In [69]:
print(potential_vendors)

{65671, 65673, 65674, 65675, 65677, 65679, 65681, 65683, 65684, 65685, 65689, 65691, 65693, 65694, 65695, 65696, 65699, 65703, 65705, 65707, 65710, 65712, 65714, 65716, 65718, 65720, 65722, 65723, 65724, 65726, 65727, 65728, 65731, 65733, 65735, 65736, 65737, 65739, 65740, 65743, 65744, 65747, 65750, 65752, 65760, 65761, 65764, 65765, 65766, 65767, 65768, 65769}


6