In [1]:
import pandas as pd

# Reading the CSV file
df = pd.read_csv(r"task\\orders.csv")

# Converting 'order_date' column to datetime type
df["order_date"] = pd.to_datetime(df["order_date"])

# Task 1: Compute the total revenue generated by the online store for each month
df["month"] = df["order_date"].dt.month
monthly_revenue = df.groupby("month")["product_price"].sum()

# Task 2: Compute the total revenue generated by each product
product_revenue = df.groupby("product_name")["product_price"].sum()

# Task 3: Compute the total revenue generated by each customer
customer_revenue = df.groupby("customer_id")["product_price"].sum()

# Task 4: Identify the top 10 customers by revenue generated
top_customers = customer_revenue.nlargest(10)


### Displaying the results

In [2]:
print("Task 1: Total revenue generated by the online store for each month")
print(monthly_revenue)

Task 1: Total revenue generated by the online store for each month
month
1     1811768.38
2     2188884.72
3     2791207.83
4     3367671.02
5     3135125.13
6     2562025.61
7     2632539.56
8     2230345.42
9     2084992.09
10    3715554.83
11    3180600.68
12    4588415.41
Name: product_price, dtype: float64


In [3]:
print("\nTask 2: Total revenue generated by each product")
print(product_revenue)


Task 2: Total revenue generated by each product
product_name
20in Monitor                   451068.99
27in 4K Gaming Monitor        2429637.70
27in FHD Monitor              1125974.93
34in Ultrawide Monitor        2348718.19
AA Batteries (4-pack)           79015.68
AAA Batteries (4-pack)          61716.59
Apple Airpods Headphones      2332350.00
Bose SoundSport Headphones    1332366.75
Flatscreen TV                 1440000.00
Google Phone                  3315000.00
LG Dryer                       387600.00
LG Washing Machine             399600.00
Lightning Charging Cable       323787.10
Macbook Pro Laptop            8030800.00
ThinkPad Laptop               4127958.72
USB-C Charging Cable           261740.85
Vareebadd Phone                826000.00
Wired Headphones               226395.18
iPhone                        4789400.00
Name: product_price, dtype: float64


In [4]:
print("\nTask 3: Total revenue generated by each customer")
print(customer_revenue)


Task 3: Total revenue generated by each customer
customer_id
0           11.95
1         1700.00
2          700.00
3          150.00
4           11.99
           ...   
178432      99.99
178433     999.99
178434     150.00
178435       2.99
178436     150.00
Name: product_price, Length: 178437, dtype: float64


In [5]:
print("\nTask 4: Top 10 customers by revenue generated")
print(top_customers)


Task 4: Top 10 customers by revenue generated
customer_id
188       3400.00
88996     3400.00
112102    3400.00
46185     2699.99
58001     2699.99
109581    2699.99
128329    2699.99
139182    2699.99
57744     2400.00
97391     2400.00
Name: product_price, dtype: float64


### Testing

In [6]:
import unittest
import os
import pandas as pd
from datetime import datetime

class TestOrderDataGeneration(unittest.TestCase):

    def setUp(self):
        # Run before each test
        self.file_path =  r'test//orders.csv'  # Provide the actual absolute path
        self.generated_data = pd.read_csv(self.file_path)

    def tearDown(self):
        # Run after each test
        pass

    def test_file_exists(self):
        self.assertTrue(os.path.exists(self.file_path), "CSV file not generated")

    def test_correct_columns(self):
        expected_columns = ['order_id', 'customer_id', 'order_date', 'product_id', 'product_name', 'product_price', 'quantity']
        self.assertListEqual(list(self.generated_data.columns), expected_columns, "Incorrect columns in the CSV file")      
        
    def test_data_types(self):
        self.assertIsInstance(int(self.generated_data['order_id'][0]), int, "order_id should be of type int")
        self.assertIsInstance(int(self.generated_data['customer_id'][0]), int, "customer_id should be of type int")
    
         # Converting 'order_date' to Timestamp
        self.generated_data['order_date'] = pd.to_datetime(self.generated_data['order_date'])
        self.assertIsInstance(self.generated_data['order_date'][0], pd.Timestamp, "order_date should be of type datetime")
    
        self.assertIsInstance(int(self.generated_data['product_id'][0]), int, "product_id should be of type int")
        self.assertIsInstance(self.generated_data['product_name'][0], str, "product_name should be of type str")
        self.assertIsInstance(float(self.generated_data['product_price'][0]), float, "product_price should be of type float")
        self.assertIsInstance(int(self.generated_data['quantity'][0]), int, "quantity should be of type int")


        
    def test_date_range(self):
        start_date = datetime(2019, 1, 1)
        end_date = datetime(2020, 1, 1)

        self.generated_data['order_date'] = pd.to_datetime(self.generated_data['order_date'])  # Converting to datetime

        date_range_check = (self.generated_data['order_date'] >= start_date) & (self.generated_data['order_date'] <= end_date)
        self.assertTrue(date_range_check.all(), "order_date should be within the specified date range")


    def test_positive_quantity(self):
        self.assertTrue(all(self.generated_data['quantity'] > 0), "quantity should be greater than 0")

