In [None]:
import pandas as pd

In [None]:
# Part 1: Explore the Data
df = pd.read_csv('data.csv')

# View the column names
print(df.columns)

# Use the describe function
print(df.describe())


In [None]:
# Answer the questions
top_categories = df['category'].value_counts().head(3)
print(top_categories)

top_subcategory = df[df['category'] == top_categories.index[0]]['subcategory'].value_counts().idxmax()
print(top_subcategory)

top_clients = df['client_id'].value_counts().head(5)
print(top_clients)

client_ids = top_clients.index.tolist()
print(client_ids)

total_units = df[df['client_id'] == client_ids[0]]['qty'].sum()
print(total_units)

In [None]:
# Part 2: Transform the Data
df['subtotal'] = df['unit_price'] * df['qty']
df['shipping_price'] = df['weight'].apply(lambda x: x * 7 if x > 50 else x * 10)
df['total_price'] = df['subtotal'] + df['shipping_price'] * 1.0925
df['line_cost'] = df['unit_cost'] * df['qty'] + df['shipping_price']
df['profit'] = df['total_price'] - df['line_cost']

In [None]:
# Part 3: Confirm Your Work
order_ids = [2742071, 2173913, 6128929]
for order_id in order_ids:
    total_price = df[df['order_id'] == order_id]['total_price'].sum()
    print(f"Order ID {order_id} had a total price of ${total_price:.2f}")

In [None]:
# Part 4: Summarize and Analyze
client_data = []
for client_id in client_ids:
    client_df = df[df['client_id'] == client_id]
    total_units = client_df['qty'].sum()
    total_shipping_price = client_df['shipping_price'].sum()
    total_revenue = client_df['total_price'].sum()
    total_profit = client_df['profit'].sum()
    client_data.append([client_id, total_units, total_shipping_price, total_revenue, total_profit])

summary_df = pd.DataFrame(client_data, columns=['Client ID', 'Total Units', 'Total Shipping Price', 'Total Revenue', 'Total Profit'])
summary_df['Total Revenue'] = summary_df['Total Revenue'].apply(lambda x: x / 1e6)
summary_df['Total Profit'] = summary_df['Total Profit'].apply(lambda x: x / 1e6)
summary_df = summary_df.sort_values('Total Profit', ascending=False)
print(summary_df)