In [1]:
import pandas as pd

In [3]:
df = pd.read_csv("Salesdata.csv")

In [6]:
df.head(20)

Unnamed: 0,order_id,customer_id,order_date,product,quantity,price
0,1,101,2025-01-15,Mobile,2,500
1,2,102,2025-01-20,Laptop,1,1200
2,3,103,2025-02-05,Tablet,3,300
3,4,101,2025-02-08,Mobile,1,500
4,5,104,2025-02-10,Headphones,5,50


In [5]:
#Deleting the unwanted "Unnamed: 6 column" and dropping the rows with NULL
df = df.drop('Unnamed: 6', axis=1)
df = df.dropna()

In [7]:
# Converting 'quantity' and 'price' columns to numeric
df['quantity'] = pd.to_numeric(df['quantity'], errors='coerce')
df['Price'] = pd.to_numeric(df['price'], errors='coerce')

In [8]:
# Calculate total revenue per product
df['Revenue'] = df['quantity'] * df['price']
revenue_per_product = df.groupby('product')['Revenue'].sum().sort_values(ascending=False)
revenue_per_product = revenue_per_product.rename('total_revenue')
print(revenue_per_product)

product
Mobile        1500
Laptop        1200
Tablet         900
Headphones     250
Name: total_revenue, dtype: int64


In [9]:
# Find top 5 customers by total spending
customer_spending = df.groupby('customer_id')['Revenue'].sum().sort_values(ascending=False)
top_5_customers = customer_spending.head(5)
top_5_customers = top_5_customers.rename('total_spending')
print(top_5_customers)


customer_id
101    1500
102    1200
103     900
104     250
Name: total_spending, dtype: int64


In [10]:
# Count how many orders were placed each month
df['order_date'] = pd.to_datetime(df['order_date'], errors='coerce')
orders_per_month = df.groupby(df['order_date'].dt.to_period('M')).size()
orders_per_month = orders_per_month.rename('total_orders')
print(orders_per_month)

order_date
2025-01    2
2025-02    3
Freq: M, Name: total_orders, dtype: int64


In [11]:
# Output results to CSV files
revenue_per_product.to_csv('product_revenue.csv', header=True)
top_5_customers.to_csv('top_customers.csv', header=True)
orders_per_month.to_csv('monthly_orders.csv', header=True)