In [1]:
import pandas as pd
import numpy as np
import openpyxl as el

In [2]:
input_file = './2024Aug1-2024Aug11CustomTransaction.csv'
output_file = './Template.xlsx'

In [3]:
data = pd.read_csv(input_file, header=7)

In [4]:
def convert_to_numeric(data, column_name):
    if data[column_name].dtype != np.float64 and data[column_name].dtype != np.int64:
        for i in range(len(data)):
            data[column_name][i] = float(data[column_name][i].replace(',', ''))
        data[column_name] = pd.to_numeric(data[column_name])
    return data

In [5]:
data = convert_to_numeric(data, 'quantity')
data = convert_to_numeric(data, 'product sales')
data = convert_to_numeric(data, 'product sales tax')
data = convert_to_numeric(data, 'total')

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  data[column_name][i] = float(data[column_name][i].replace(',', ''))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-co

In [6]:
data.head()

Unnamed: 0,date/time,settlement id,type,order id,sku,description,quantity,marketplace,fulfilment,order city,...,gift wrap credits,giftwrap credits tax,promotional rebates,promotional rebates tax,marketplace withheld tax,selling fees,fba fees,other transaction fees,other,total
0,31 Jul 2024 23:33:56 UTC,22631592742,Order,205-2290971-4778706,MY-0IHR-ICWL,"Dell D3100 Docking Station 3, 0 Ultra HD Tripl...",1.0,amazon.co.uk,Amazon,KENILWORTH,...,0,0,0.0,0.0,0.0,-14.32,-4.68,0.0,0,58.99
1,31 Jul 2024 23:35:51 UTC,22631592742,Order,202-7636569-4587522,ZD-Z7S8-4ZA1,"Dell D6000 Usb-C Triple Docking Station, Up To...",1.0,amazon.co.uk,Amazon,GLASGOW,...,0,0,0.0,0.0,0.0,-15.42,-4.68,0.0,0,63.89
2,31 Jul 2024 23:47:21 UTC,22631592742,Refund,205-2244996-9845929,7U-X2MJ-KHWY,"DELL U2515H ADZG 25-Inch LCD Monitor, 350 cd/m...",1.0,amazon.co.uk,Amazon,WORCESTER,...,0,0,0.0,0.0,0.0,6.24,0.0,0.0,0,-108.75
3,1 Aug 2024 00:04:32 UTC,22631592742,Order,026-5773758-5798728,BL-VW07-9TIM,"Lenovo Essential Compact Wireless Mouse, Black...",1.0,amazon.co.uk,Amazon,Bideford,...,0,0,0.0,0.0,0.0,-2.75,-3.66,0.0,0,8.58
4,1 Aug 2024 00:40:55 UTC,22631592742,Order,203-9616116-7301942,MS116STKCL,Dell MS116 - Mouse - Optical - 2 Buttons - Wir...,9.0,amazon.co.uk,Amazon,BRISTOL,...,0,0,0.0,0.0,0.0,-18.14,-31.86,0.0,0,48.91


In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 436 entries, 0 to 435
Data columns (total 27 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   date/time                 436 non-null    object 
 1   settlement id             436 non-null    int64  
 2   type                      436 non-null    object 
 3   order id                  395 non-null    object 
 4   sku                       397 non-null    object 
 5   description               435 non-null    object 
 6   quantity                  398 non-null    float64
 7   marketplace               423 non-null    object 
 8   fulfilment                387 non-null    object 
 9   order city                387 non-null    object 
 10  order state               108 non-null    object 
 11  order postal              383 non-null    object 
 12  tax collection model      5 non-null      object 
 13  product sales             436 non-null    float64
 14  product sa

In [8]:
adjustment = data.loc[data['type'] == 'Adjustment']['total'].astype(np.float64).sum()

In [9]:
fba_inventory_fee = data.loc[data['type'] == 'FBA Inventory Fee']['total'].astype(np.float64).sum() * -1

In [10]:
service_fee = data.loc[data['type'] == 'Service Fee']['total'].astype(np.float64).sum() * -1

In [11]:
product_sales_tax = data.loc[data['type'] == 'Order']['product sales tax'].astype(np.float64).sum()

In [12]:
paid_transaction_gross = data.loc[data['type'] == 'Order']['product sales'].astype(np.float64).sum() + data.loc[data['type'] == 'Order']['product sales tax'].astype(np.float64).sum()

In [13]:
sales_after_amazon_sales_fees = data.loc[data['type'] == 'Order']['total'].astype(np.float64).sum()

In [14]:
amazon_fees_on_orders = paid_transaction_gross - sales_after_amazon_sales_fees

In [15]:
returns = data.loc[data['type'] == 'Refund']['total'].astype(np.float64).sum() * -1

In [16]:
reclaimed_vat = data.loc[data['type'] == 'Refund']['product sales tax'].astype(np.float64).sum() * -1

In [17]:
retrocharge = data.loc[data['type'] == 'Retrocharge']['total'].astype(np.float64).sum()

In [18]:
order_sku_quantity_tax_total = data.loc[data['type'] == 'Order'][['sku', 'quantity', 'product sales tax', 'total']]

In [19]:
new_order_sku_quantity_tax_total = order_sku_quantity_tax_total.groupby(['sku'], as_index=False).sum()

In [20]:
refund_sku_quantity = data.loc[data['type'] == 'Refund'][['sku', 'quantity']]

In [21]:
new_refund_sku_quantity = refund_sku_quantity.groupby(['sku'], as_index=False).sum()

In [22]:
order_refund_sku = np.concatenate((data.loc[data['type'] == 'Order']['sku'].unique(), data.loc[data['type'] == 'Refund']['sku'].unique()))

In [27]:
order_refund_sku = pd.Series(order_refund_sku).dropna().to_numpy()

In [28]:
order_refund_sku = np.unique(order_refund_sku)

In [29]:
wb = el.load_workbook(output_file)

In [30]:
ws1 = wb['Sheet1']

In [31]:
ws1['B16'].value = adjustment

In [32]:
ws1['B9'].value = fba_inventory_fee

In [33]:
ws1['B8'].value = service_fee

In [34]:
ws1['B5'].value = product_sales_tax

In [35]:
ws1['B3'].value = paid_transaction_gross

In [36]:
ws1['B4'].value = sales_after_amazon_sales_fees

In [37]:
ws1['B6'].value = amazon_fees_on_orders

In [38]:
ws1['B13'].value = returns

In [39]:
ws1['B14'].value = reclaimed_vat

In [40]:
ws1['B21'].value = retrocharge

In [41]:
wb.save(output_file)

In [42]:
i = 27

while ws1.cell(row=i, column=1).value != None:
    if ws1.cell(row=i, column=1).value in new_order_sku_quantity_tax_total['sku'].unique():
        ws1.cell(row=i, column=5).value = new_order_sku_quantity_tax_total.loc[new_order_sku_quantity_tax_total['sku'] == ws1.cell(row=i, column=1).value]['quantity'].astype(np.float64).item()
    i = i + 1

In [43]:
wb.save(output_file)

In [44]:
i = 27

while ws1.cell(row=i, column=1).value != None:
    if ws1.cell(row=i, column=1).value in new_refund_sku_quantity['sku'].unique():
        ws1.cell(row=i, column=14).value = new_refund_sku_quantity.loc[new_refund_sku_quantity['sku'] == ws1.cell(row=i, column=1).value]['quantity'].astype(np.float64).item()
    i = i + 1

In [45]:
wb.save(output_file)

In [46]:
i = 27

while ws1.cell(row=i, column=1).value != None:
    if ws1.cell(row=i, column=1).value in new_refund_sku_quantity['sku'].unique():
        ws1.cell(row=i, column=15).value = ws1.cell(row=i, column=4).value * ws1.cell(row=i, column=14).value
    i = i + 1

In [47]:
wb.save(output_file)

In [48]:
i = 27

while ws1.cell(row=i, column=1).value != None:
    if ws1.cell(row=i, column=1).value in new_order_sku_quantity_tax_total['sku'].unique():
        ws1.cell(row=i, column=6).value = new_order_sku_quantity_tax_total.loc[new_order_sku_quantity_tax_total['sku'] == ws1.cell(row=i, column=1).value]['product sales tax'].astype(np.float64).item()
    i = i + 1

In [49]:
wb.save(output_file)

In [50]:
i = 27

while ws1.cell(row=i, column=1).value != None:
    if ws1.cell(row=i, column=1).value in new_order_sku_quantity_tax_total['sku'].unique():
        ws1.cell(row=i, column=7).value = new_order_sku_quantity_tax_total.loc[new_order_sku_quantity_tax_total['sku'] == ws1.cell(row=i, column=1).value]['total'].astype(np.float64).item()
    i = i + 1

In [51]:
wb.save(output_file)

In [52]:
report_file_sku = []

i = 27

while ws1.cell(row=i, column=1).value != None:
    report_file_sku.append(ws1.cell(row=i, column=1).value)
    i = i + 1

In [53]:
ws2 = wb['Sheet2']

In [54]:
for val in order_refund_sku:
    if val not in report_file_sku:
        i = 1
        while ws2.cell(row=i, column=1).value != None: 
            i = i + 1
        ws2.cell(row=i, column=1).value = val

In [55]:
wb.save(output_file)