In [None]:
%pip install polars

In [None]:
import polars as pl


In [None]:
df = pl.read_csv('all_orders_original.csv')

In [None]:
df.head()

In [None]:
# Drop unnecessary columns: password, Billing Address 2, order notes
df = df.drop(['Password', 'Billing Address 2', 'Order note'], axis=1)

In [None]:
# Check last rows of the data where Billing Country is Croatia
df[df['Billing Country'] == "HR"].tail(40)

In [None]:
# Define a conditional expression for HRK
condition = (df['Billing Country'] == "HR") & (df['Date'] <= "2022-08-12")
exchange_rate = 7.5345

In [None]:
# Convert HRK to EUR for orders from Croatia that happened before 2022-08-12 (inclusive) in a new column called Total (EUR)
# df['Total (EUR)'] = (df['Total'] / exchange_rate).when(df['Billing Country'] == "HR" & df['Date'] <= "2022-08-12")
df.loc[(df['Billing Country'] == "HR") & (df['Date'] <= "2022-08-12"), 'Total (EUR)'] = df['Total'] / exchange_rate

In [None]:
df[df['Billing Country'] == 'HR'].tail(40)

In [None]:
# Drop Total column
df = df.drop('Total', axis=1)

In [None]:
# Maybe Convert HRK to EUR for other columns as well?

In [None]:
# Fill NaN values with 0: Order Shipping
df['Order Shipping'] = df['Order Shipping'].fillna(0)

In [None]:
# Check if the following is true in a newly created column: above 52 EUR Cro free shipping & above 290 EUR outside Cro free shipping
df['Free Shipping'] = (df['Total (EUR)'] >= 52) & (df['Billing Country'] == "HR") | (df['Total (EUR)'] >= 290) & (df['Billing Country'] != "HR")
# Compare the two columns: Free Shipping & Order Shipping
df['Free Shipping'] == df['Order Shipping']

In [None]:
# Clean the payment method column
df['Payment Method'] = df['Payment Method'].replace(['Direct bank transfer', 'Direct Bank Transfer'], 'Bank Transfer')
df['Payment Method'] = df['Payment Method'].replace(['Credit card', 'Credit Card'], 'Card')
df['Payment Method'] = df['Payment Method'].replace(['PayPal', 'Paypal'], 'PayPal')

In [None]:
# Clean the Stripe Charge Captured column with 0 and 1 values
df['Stripe Charge Captured'] = df['Stripe Charge Captured'].replace(['Yes', 'No'], [1, 0])
# # make that column a number
# df['Stripe Charge Captured'] = df['Stripe Charge Captured'].cast(pl.Int32)
df.head()


In [None]:
# Merge the 2 Shipping Method Title columns
df['Shipping Method Title'] = df['Shipping Method Title'].fillna(df['Shipping Method Title.1'])
# Drop the Shipping Method Title.1 column
df = df.drop(['Shipping Method Title.1'], axis=1)


In [None]:
# Make pie chart of shipping methods
shipping_methods = df.groupby('Shipping Method Title').select('Shipping Method Title', 'Total').sum()
shipping_methods = shipping_methods.sort('Total', reverse=True)
shipping_methods = shipping_methods.to_pandas()
shipping_methods = shipping_methods.set_index('Shipping Method Title')
shipping_methods.plot.pie(y='Total', figsize=(10, 10))

In [None]:
# Make pie chart of payment methods
payment_methods = df.groupby('Payment Method').select('Payment Method', 'Total').sum()
payment_methods = payment_methods.sort('Total', reverse=True)
payment_methods = payment_methods.to_pandas()
payment_methods = payment_methods.set_index('Payment Method')
payment_methods.plot.pie(y='Total', figsize=(10, 10))

In [None]:
# Filter only "wc-completed" orders into new dataframe
completed_orders = df.filter(df['Status'] == "wc-completed")
# Save the dataframe to a csv file
completed_orders.to_csv('completed_orders.csv')

In [None]:
# Compare the rest of the statuses with the "wc-completed" orders
