In [6]:
import os
import logging
import pandas as pd
import datetime as dt
import sys


In [7]:
# ---------------------------------------------------------
# 0. LOGGING CONFIGURATION
# ---------------------------------------------------------
# This sets up the logger to write to a file AND print to the console
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("etl_process.log"), 
        logging.StreamHandler(sys.stdout)       
    ]
)

logger = logging.getLogger(__name__)

In [8]:
def run_etl_pipeline():
    
    logger.info(">>> ETL Process Started")
    sales_dir = 'C:\\Users\\karth\\Downloads\\Python Assignment\\Python Assignment\\Sales_Split'
    # ---------------------------------------------------------
    # Checking File Path
    # ---------------------------------------------------------
    try:
        if not os.path.exists(sales_dir):
            logger.error(f">>> Critical Error: Directory '{sales_dir}' not found.")
            return
        file_list = [f for f in os.listdir(sales_dir) if f.endswith('.csv')]
        file_count = len(file_list)
        logger.info(f">>> Scanning directory Ended.")
        logger.info(f">>> Found {file_count} sales files to process.")
        if file_count == 0:
            logger.warning(">>> No CSV files found in the directory. Stopping process.")
            return
    except Exception as e:
        logger.error(f">>> Error accessing directory '{sales_dir}': {e}")
        return
    # ---------------------------------------------------------
    # Loading and Merging the datasets
    # ---------------------------------------------------------
    try:
        data_frames = []
        
        logger.info(">>> Reading files ...")
        for filename in file_list:
            file_path = os.path.join(sales_dir, filename)
            try:
                # Read individual file
                df = pd.read_csv(file_path)
                data_frames.append(df)
            except Exception as e:
                logger.error(f">>> Failed to read file {filename}: {e}")
                continue
        if not data_frames:
            logger.error(">>> No valid dataframes loaded.")
            return
        all_sales_data = pd.concat(data_frames, ignore_index=True)
        total_rows = len(all_sales_data)
        logger.info(f">>> Successfully merged {len(data_frames)} files. Total sales records: {total_rows}")
        
    
        if total_rows > 0:
            logger.info(">>> Validation Passed: Sales data is not empty. Proceeding to Joins.")
        try:
                store_master = pd.read_csv('../store_master.csv')
                product_master = pd.read_csv('../product_master.csv')
                logger.info(f'>>> Total Rows in Store_Master Dataset {len(store_master)}')
                logger.info(f'>>> Total Rows in Product_Master Dataset {len(product_master)}')
        except FileNotFoundError as e:
                logger.error(f">>> Critical Error: Master file missing - {e}")
                return
        
        
        # -----------------------------------------------------
            # Left Joing the three datasets Extracted.
            # -----------------------------------------------------
        merged_df = pd.merge(all_sales_data, store_master, on='store_id', how='left')
        logger.info('>>> Joined Sales Data with Store Dataset')
        merged_df = pd.merge(merged_df, product_master, on='sku', how='left')
        logger.info(">>> Joined Sales Data with Product Dataset.")
        
        # -----------------------------------------------------
            # 5.Filtering for the store with active status column
            # -----------------------------------------------------
        if 'status' in merged_df.columns:
            active_sales_df = merged_df[merged_df['status'] == 'Active'].copy()
            logger.info(">>> Filtered for Active stores.")
        else:
            logger.warning(">>> Warning: 'status' column not found. Skipping filter.")
            active_sales_df = merged_df
        # -----------------------------------------------------
            # 6. Aggreating the Data with  sum() function
            # -----------------------------------------------------
        logger.info(">>> Aggregating data...")
        agg_list=['store_id', 'electronics_type', 'classification']
        name_qunty=['sales_qty', 'sales_value']
        aggregated_df = active_sales_df.groupby(
                agg_list
            )[name_qunty].sum().reset_index()
        
        # -----------------------------------------------------
            # Saving the Aggregated data
            # -----------------------------------------------------
        output_file = 'aggregated_sales.csv'
        aggregated_df.to_csv(output_file, index=False)
        logger.info(f">>> SUCCESS. Final output saved as '{output_file}'.")
        
    except Exception as e:
        logger.error(f">>> Error loading data: {e}")
        return
    
    
    

In [9]:
if __name__ == "__main__":
    run_etl_pipeline()

2025-12-23 00:42:42,163 - INFO - >>> ETL Process Started
2025-12-23 00:42:42,165 - INFO - >>> Scanning directory Ended.
2025-12-23 00:42:42,166 - INFO - >>> Found 20 sales files to process.
2025-12-23 00:42:42,167 - INFO - >>> Reading files ...
2025-12-23 00:42:56,335 - INFO - >>> Successfully merged 20 files. Total sales records: 20000000
2025-12-23 00:42:56,337 - INFO - >>> Validation Passed: Sales data is not empty. Proceeding to Joins.
2025-12-23 00:42:56,444 - INFO - >>> Total Rows in Store_Master Dataset 20000
2025-12-23 00:42:56,445 - INFO - >>> Total Rows in Product_Master Dataset 100000
2025-12-23 00:43:04,084 - INFO - >>> Joined Sales Data with Store Dataset
2025-12-23 00:43:21,647 - INFO - >>> Joined Sales Data with Product Dataset.
2025-12-23 00:43:34,709 - INFO - >>> Filtered for Active stores.
2025-12-23 00:43:34,895 - INFO - >>> Aggregating data...
2025-12-23 00:43:49,543 - INFO - >>> SUCCESS. Final output saved as 'aggregated_sales.csv'.


In [10]:
sales_dir = 'C:\\Users\\karth\\Downloads\\Python Assignment\\Python Assignment\\Sales_Split'

In [11]:
file_list = [f for f in os.listdir(sales_dir) if f.endswith('.csv')]

In [12]:
file_list

['sales_part_1.csv',
 'sales_part_10.csv',
 'sales_part_11.csv',
 'sales_part_12.csv',
 'sales_part_13.csv',
 'sales_part_14.csv',
 'sales_part_15.csv',
 'sales_part_16.csv',
 'sales_part_17.csv',
 'sales_part_18.csv',
 'sales_part_19.csv',
 'sales_part_2.csv',
 'sales_part_20.csv',
 'sales_part_3.csv',
 'sales_part_4.csv',
 'sales_part_5.csv',
 'sales_part_6.csv',
 'sales_part_7.csv',
 'sales_part_8.csv',
 'sales_part_9.csv']

In [13]:
data_frames = []

In [14]:
for filename in file_list:
            file_path = os.path.join(sales_dir, filename)
            df=pd.read_csv(file_path)
            data_frames.append(df)
            

In [15]:
data_frames[0].head()

Unnamed: 0,store_id,sku,date,sales_qty,sales_value
0,S00341,SKU064049,2024-11-02,1,364.9
1,S16112,SKU016687,2024-08-19,1,110.03
2,S13232,SKU079938,2024-11-27,1,65.94
3,S16882,SKU024958,2025-03-16,1,74.46
4,S13774,SKU024371,2025-02-05,1,266.55


In [16]:
all_sales_data = pd.concat(data_frames, ignore_index=True)

In [17]:
all_sales_data.head(10)

Unnamed: 0,store_id,sku,date,sales_qty,sales_value
0,S00341,SKU064049,2024-11-02,1,364.9
1,S16112,SKU016687,2024-08-19,1,110.03
2,S13232,SKU079938,2024-11-27,1,65.94
3,S16882,SKU024958,2025-03-16,1,74.46
4,S13774,SKU024371,2025-02-05,1,266.55
5,S05891,SKU094915,2025-01-27,1,165.24
6,S03025,SKU010522,2024-09-10,2,282.45
7,S19024,SKU043792,2024-06-19,1,182.86
8,S17730,SKU055661,2024-12-09,1,351.27
9,S11860,SKU004353,2024-07-14,1,44.71


In [56]:
all_sales_data.isna().sum()

store_id       0
sku            0
date           0
sales_qty      0
sales_value    0
dtype: int64

In [18]:
store_master = pd.read_csv('../store_master.csv')
product_master = pd.read_csv('../product_master.csv')

In [19]:
store_master.head(10)

Unnamed: 0,store_id,city,state,region,status
0,S00001,Kolkata,KA,East,Active
1,S00002,Chennai,TS,West,Inactive
2,S00003,Hyderabad,TN,East,Active
3,S00004,Kolkata,DL,East,Active
4,S00005,Bangalore,TN,West,Active
5,S00006,Hyderabad,TS,North,Active
6,S00007,Hyderabad,TS,North,Active
7,S00008,Kolkata,WB,West,Active
8,S00009,Mumbai,TN,South,Active
9,S00010,Bangalore,TN,East,Active


In [20]:
product_master.head(10)

Unnamed: 0,sku,category,electronics_type,classification,mrp
0,SKU000001,ELECTRONICS,CHARGERS,Standard,453.55
1,SKU000002,ELECTRONICS,NETWORKING_DEVICES,Standard,487.13
2,SKU000003,ELECTRONICS,LAPTOPS,Economy,126.47
3,SKU000004,ELECTRONICS,SMARTWATCHES,Standard,330.04
4,SKU000005,ELECTRONICS,SMARTPHONES,Economy,407.68
5,SKU000006,ELECTRONICS,CAMERAS,Economy,168.71
6,SKU000007,ELECTRONICS,SMART_TV,Economy,435.71
7,SKU000008,ELECTRONICS,FITNESS_BANDS,Premium,187.58
8,SKU000009,ELECTRONICS,TABLETS,Economy,474.32
9,SKU000010,ELECTRONICS,MOBILES,Economy,319.49


In [21]:
merged_df = pd.merge(all_sales_data, store_master, on='store_id', how='left')

In [22]:
merged_df.head(10)

Unnamed: 0,store_id,sku,date,sales_qty,sales_value,city,state,region,status
0,S00341,SKU064049,2024-11-02,1,364.9,Bangalore,TS,West,Active
1,S16112,SKU016687,2024-08-19,1,110.03,Chennai,TN,North,Active
2,S13232,SKU079938,2024-11-27,1,65.94,Kolkata,DL,South,Active
3,S16882,SKU024958,2025-03-16,1,74.46,Mumbai,TS,South,Active
4,S13774,SKU024371,2025-02-05,1,266.55,Chennai,TN,South,Active
5,S05891,SKU094915,2025-01-27,1,165.24,Bangalore,MH,West,Inactive
6,S03025,SKU010522,2024-09-10,2,282.45,Bangalore,DL,East,Active
7,S19024,SKU043792,2024-06-19,1,182.86,Chennai,DL,West,Active
8,S17730,SKU055661,2024-12-09,1,351.27,Mumbai,TN,East,Active
9,S11860,SKU004353,2024-07-14,1,44.71,Delhi,MH,North,Active


In [23]:
merged_df = pd.merge(merged_df, product_master, on='sku', how='left')

In [24]:
merged_df.head(10)

Unnamed: 0,store_id,sku,date,sales_qty,sales_value,city,state,region,status,category,electronics_type,classification,mrp
0,S00341,SKU064049,2024-11-02,1,364.9,Bangalore,TS,West,Active,ELECTRONICS,MOBILES,Standard,483.39
1,S16112,SKU016687,2024-08-19,1,110.03,Chennai,TN,North,Active,ELECTRONICS,MODEMS,Economy,171.03
2,S13232,SKU079938,2024-11-27,1,65.94,Kolkata,DL,South,Active,ELECTRONICS,CABLES,Economy,103.32
3,S16882,SKU024958,2025-03-16,1,74.46,Mumbai,TS,South,Active,ELECTRONICS,SMARTWATCHES,Economy,110.4
4,S13774,SKU024371,2025-02-05,1,266.55,Chennai,TN,South,Active,ELECTRONICS,FITNESS_BANDS,Premium,434.85
5,S05891,SKU094915,2025-01-27,1,165.24,Bangalore,MH,West,Inactive,ELECTRONICS,ACTION_CAMERAS,Standard,260.5
6,S03025,SKU010522,2024-09-10,2,282.45,Bangalore,DL,East,Active,ELECTRONICS,TABLETS,Economy,182.83
7,S19024,SKU043792,2024-06-19,1,182.86,Chennai,DL,West,Active,ELECTRONICS,CABLES,Standard,237.06
8,S17730,SKU055661,2024-12-09,1,351.27,Mumbai,TN,East,Active,ELECTRONICS,DSLR_CAMERAS,Economy,453.68
9,S11860,SKU004353,2024-07-14,1,44.71,Delhi,MH,North,Active,ELECTRONICS,DSLR_CAMERAS,Standard,71.96


In [25]:
merged_df[merged_df['status']=='Active'].isna().sum()

store_id            0
sku                 0
date                0
sales_qty           0
sales_value         0
city                0
state               0
region              0
status              0
category            0
electronics_type    0
classification      0
mrp                 0
dtype: int64

In [26]:
active_sales_df=merged_df[merged_df['status']=='Active'].copy()

In [27]:
active_sales_df.head(10)

Unnamed: 0,store_id,sku,date,sales_qty,sales_value,city,state,region,status,category,electronics_type,classification,mrp
0,S00341,SKU064049,2024-11-02,1,364.9,Bangalore,TS,West,Active,ELECTRONICS,MOBILES,Standard,483.39
1,S16112,SKU016687,2024-08-19,1,110.03,Chennai,TN,North,Active,ELECTRONICS,MODEMS,Economy,171.03
2,S13232,SKU079938,2024-11-27,1,65.94,Kolkata,DL,South,Active,ELECTRONICS,CABLES,Economy,103.32
3,S16882,SKU024958,2025-03-16,1,74.46,Mumbai,TS,South,Active,ELECTRONICS,SMARTWATCHES,Economy,110.4
4,S13774,SKU024371,2025-02-05,1,266.55,Chennai,TN,South,Active,ELECTRONICS,FITNESS_BANDS,Premium,434.85
6,S03025,SKU010522,2024-09-10,2,282.45,Bangalore,DL,East,Active,ELECTRONICS,TABLETS,Economy,182.83
7,S19024,SKU043792,2024-06-19,1,182.86,Chennai,DL,West,Active,ELECTRONICS,CABLES,Standard,237.06
8,S17730,SKU055661,2024-12-09,1,351.27,Mumbai,TN,East,Active,ELECTRONICS,DSLR_CAMERAS,Economy,453.68
9,S11860,SKU004353,2024-07-14,1,44.71,Delhi,MH,North,Active,ELECTRONICS,DSLR_CAMERAS,Standard,71.96
10,S19645,SKU097297,2024-09-25,1,93.13,Pune,DL,South,Active,ELECTRONICS,SMART_HOME_DEVICES,Premium,145.57


In [28]:
agg_list=['store_id', 'electronics_type', 'classification']
name_qunty=['sales_qty', 'sales_value']

In [29]:
aggregated_df = active_sales_df.groupby(
                agg_list
            )[name_qunty].sum(numeric_only=True).reset_index()

In [30]:
aggregated_df.head(10)

Unnamed: 0,store_id,electronics_type,classification,sales_qty,sales_value
0,S00001,ACTION_CAMERAS,Economy,14,2297.85
1,S00001,ACTION_CAMERAS,Luxury,5,1002.31
2,S00001,ACTION_CAMERAS,Premium,11,1887.16
3,S00001,ACTION_CAMERAS,Standard,30,5993.22
4,S00001,AUDIO_DEVICES,Economy,18,3675.57
5,S00001,AUDIO_DEVICES,Premium,9,2584.09
6,S00001,AUDIO_DEVICES,Standard,15,2815.85
7,S00001,CABLES,Economy,20,3844.09
8,S00001,CABLES,Premium,7,1177.49
9,S00001,CABLES,Standard,29,5564.92


In [31]:
aggregated_df = active_sales_df.groupby(
                agg_list
            )[name_qunty].sum(numeric_only=True).sort_values(by=name_qunty, ascending=False).reset_index()

In [32]:
aggregated_df.head(10)

Unnamed: 0,store_id,electronics_type,classification,sales_qty,sales_value
0,S13202,STORAGE_DEVICES,Economy,60,11524.11
1,S19110,POWER_BANKS,Economy,55,9956.23
2,S14589,LAPTOPS,Economy,53,9753.5
3,S18651,MOBILES,Economy,52,10740.8
4,S00519,CABLES,Economy,52,9916.76
5,S00730,MOBILES,Economy,52,9424.47
6,S12343,WEARABLES,Economy,52,9340.88
7,S04549,FEATURE_PHONES,Economy,51,10872.04
8,S06423,HOME_THEATRE,Economy,51,10706.6
9,S12493,POWER_BANKS,Economy,51,10702.63


In [33]:
aggregated_df = active_sales_df.groupby(
                agg_list
            )[name_qunty].sum(numeric_only=True).sort_values(by=['sales_value','sales_qty'], ascending=False).reset_index()

In [34]:
aggregated_df.head(10)

Unnamed: 0,store_id,electronics_type,classification,sales_qty,sales_value
0,S16854,CABLES,Economy,46,11799.04
1,S07737,FITNESS_BANDS,Economy,47,11558.09
2,S13202,STORAGE_DEVICES,Economy,60,11524.11
3,S19690,GAMING_ACCESSORIES,Standard,44,11433.02
4,S16082,AUDIO_DEVICES,Economy,48,11255.77
5,S01679,HEADPHONES,Economy,45,11040.68
6,S13070,CAMERAS,Economy,43,10982.58
7,S03894,FEATURE_PHONES,Economy,46,10949.79
8,S05972,CAMERAS,Economy,42,10948.26
9,S10961,TELEVISIONS,Economy,44,10887.54


In [35]:
luxry_aggregated_df=aggregated_df[aggregated_df['classification']=='Luxury'].copy()

In [36]:
luxry_aggregated_df.head(10).sort_values(by=['sales_value','sales_qty'], ascending=False)

Unnamed: 0,store_id,electronics_type,classification,sales_qty,sales_value
141113,S18332,EARPHONES,Luxury,16,4979.17
228403,S14308,DSLR_CAMERAS,Luxury,16,4497.92
295166,S14788,FITNESS_BANDS,Luxury,15,4212.05
323750,S06683,SECURITY_CAMERAS,Luxury,17,4102.93
345075,S03348,SPEAKERS,Luxury,19,4025.86
356603,S04461,TABLETS,Luxury,12,3984.41
365163,S09326,ROUTERS,Luxury,16,3954.37
368241,S04025,CHARGERS,Luxury,12,3943.81
379825,S16219,WEARABLES,Luxury,15,3904.84
391305,S10440,NETWORKING_DEVICES,Luxury,13,3866.56


In [37]:
luxry_aggregated_df.head(10).sort_values(by=name_qunty, ascending=False)

Unnamed: 0,store_id,electronics_type,classification,sales_qty,sales_value
345075,S03348,SPEAKERS,Luxury,19,4025.86
323750,S06683,SECURITY_CAMERAS,Luxury,17,4102.93
141113,S18332,EARPHONES,Luxury,16,4979.17
228403,S14308,DSLR_CAMERAS,Luxury,16,4497.92
365163,S09326,ROUTERS,Luxury,16,3954.37
295166,S14788,FITNESS_BANDS,Luxury,15,4212.05
379825,S16219,WEARABLES,Luxury,15,3904.84
391305,S10440,NETWORKING_DEVICES,Luxury,13,3866.56
356603,S04461,TABLETS,Luxury,12,3984.41
368241,S04025,CHARGERS,Luxury,12,3943.81


In [38]:
aggregated_df['electronics_type'].unique()

array(['CABLES', 'FITNESS_BANDS', 'STORAGE_DEVICES', 'GAMING_ACCESSORIES',
       'AUDIO_DEVICES', 'HEADPHONES', 'CAMERAS', 'FEATURE_PHONES',
       'TELEVISIONS', 'SMARTPHONES', 'EARPHONES', 'MOBILES',
       'HOME_THEATRE', 'POWER_BANKS', 'WEARABLES', 'SECURITY_CAMERAS',
       'SMART_TV', 'MODEMS', 'NETWORKING_DEVICES', 'ROUTERS',
       'SMART_HOME_DEVICES', 'GAMING_CONSOLES', 'DSLR_CAMERAS', 'TABLETS',
       'LAPTOPS', 'SPEAKERS', 'CHARGERS', 'SMARTWATCHES',
       'ACTION_CAMERAS', 'DESKTOPS'], dtype=object)

In [39]:
aggregated_df['classification'].unique()

array(['Economy', 'Standard', 'Premium', 'Luxury'], dtype=object)

In [40]:
prem_aggre_df=aggregated_df[aggregated_df['classification']=='Premium'].copy()

In [41]:
prem_aggre_df.sort_values(by=name_qunty, ascending=False).head(10)

Unnamed: 0,store_id,electronics_type,classification,sales_qty,sales_value
18580,S11418,CHARGERS,Premium,30,6604.32
22599,S10670,SMARTPHONES,Premium,29,6467.83
28526,S10756,GAMING_CONSOLES,Premium,29,6298.14
80256,S08881,CABLES,Premium,29,5483.51
34050,S04759,TELEVISIONS,Premium,28,6167.77
34859,S10448,TELEVISIONS,Premium,28,6150.38
61031,S10958,GAMING_ACCESSORIES,Premium,28,5711.36
254764,S14082,EARPHONES,Premium,28,4378.29
13043,S02092,FITNESS_BANDS,Premium,27,6854.23
34289,S05123,DESKTOPS,Premium,27,6163.21


In [42]:
prem_aggre_df.sort_values(by=['sales_value','sales_qty'], ascending=False).head(10)

Unnamed: 0,store_id,electronics_type,classification,sales_qty,sales_value
7368,S17803,DESKTOPS,Premium,24,7248.16
13043,S02092,FITNESS_BANDS,Premium,27,6854.23
13693,S03694,CAMERAS,Premium,26,6820.46
18580,S11418,CHARGERS,Premium,30,6604.32
22106,S13575,GAMING_ACCESSORIES,Premium,22,6482.46
22599,S10670,SMARTPHONES,Premium,29,6467.83
27135,S01185,SPEAKERS,Premium,24,6336.17
28526,S10756,GAMING_CONSOLES,Premium,29,6298.14
31398,S12076,LAPTOPS,Premium,25,6228.63
33836,S11561,NETWORKING_DEVICES,Premium,23,6172.36


In [43]:
stand_agg_df=aggregated_df[aggregated_df['classification']=='Standard'].copy()

In [44]:
stand_agg_df.sort_values(by=name_qunty, ascending=False).head(10)

Unnamed: 0,store_id,electronics_type,classification,sales_qty,sales_value
341,S14955,SMART_TV,Standard,48,9087.89
17727,S19990,EARPHONES,Standard,47,6639.23
554,S14000,CABLES,Standard,46,8816.26
3,S19690,GAMING_ACCESSORIES,Standard,44,11433.02
458,S09449,SMART_TV,Standard,44,8926.3
938,S18826,TELEVISIONS,Standard,44,8505.56
1754,S14739,SMART_TV,Standard,44,8129.92
124,S17368,SECURITY_CAMERAS,Standard,43,9633.03
284,S18040,NETWORKING_DEVICES,Standard,43,9193.24
604,S10507,CABLES,Standard,43,8770.33


In [45]:
stand_agg_df.sort_values(by=['sales_value','sales_qty'], ascending=False).head(10)

Unnamed: 0,store_id,electronics_type,classification,sales_qty,sales_value
3,S19690,GAMING_ACCESSORIES,Standard,44,11433.02
44,S18525,SECURITY_CAMERAS,Standard,41,10129.5
75,S10058,HEADPHONES,Standard,40,9898.0
82,S00377,SMART_TV,Standard,37,9846.17
100,S16902,MOBILES,Standard,34,9768.95
111,S07432,SMART_TV,Standard,40,9726.82
124,S17368,SECURITY_CAMERAS,Standard,43,9633.03
161,S06023,CABLES,Standard,36,9520.05
201,S16142,CABLES,Standard,40,9393.01
265,S10643,MOBILES,Standard,38,9243.89


In [46]:
eco_agg_df=aggregated_df[aggregated_df['classification']=='Economy'].copy()

In [47]:
eco_agg_df.sort_values(by=name_qunty, ascending=False).head(10)

Unnamed: 0,store_id,electronics_type,classification,sales_qty,sales_value
2,S13202,STORAGE_DEVICES,Economy,60,11524.11
63,S19110,POWER_BANKS,Economy,55,9956.23
104,S14589,LAPTOPS,Economy,53,9753.5
15,S18651,MOBILES,Economy,52,10740.8
70,S00519,CABLES,Economy,52,9916.76
189,S00730,MOBILES,Economy,52,9424.47
225,S12343,WEARABLES,Economy,52,9340.88
11,S04549,FEATURE_PHONES,Economy,51,10872.04
16,S06423,HOME_THEATRE,Economy,51,10706.6
17,S12493,POWER_BANKS,Economy,51,10702.63


In [48]:
eco_agg_df.sort_values(by=['sales_value','sales_qty'], ascending=False).head(10)

Unnamed: 0,store_id,electronics_type,classification,sales_qty,sales_value
0,S16854,CABLES,Economy,46,11799.04
1,S07737,FITNESS_BANDS,Economy,47,11558.09
2,S13202,STORAGE_DEVICES,Economy,60,11524.11
4,S16082,AUDIO_DEVICES,Economy,48,11255.77
5,S01679,HEADPHONES,Economy,45,11040.68
6,S13070,CAMERAS,Economy,43,10982.58
7,S03894,FEATURE_PHONES,Economy,46,10949.79
8,S05972,CAMERAS,Economy,42,10948.26
9,S10961,TELEVISIONS,Economy,44,10887.54
10,S08277,SMARTPHONES,Economy,47,10886.99


In [49]:
aggregated_df.head(10)

Unnamed: 0,store_id,electronics_type,classification,sales_qty,sales_value
0,S16854,CABLES,Economy,46,11799.04
1,S07737,FITNESS_BANDS,Economy,47,11558.09
2,S13202,STORAGE_DEVICES,Economy,60,11524.11
3,S19690,GAMING_ACCESSORIES,Standard,44,11433.02
4,S16082,AUDIO_DEVICES,Economy,48,11255.77
5,S01679,HEADPHONES,Economy,45,11040.68
6,S13070,CAMERAS,Economy,43,10982.58
7,S03894,FEATURE_PHONES,Economy,46,10949.79
8,S05972,CAMERAS,Economy,42,10948.26
9,S10961,TELEVISIONS,Economy,44,10887.54


In [50]:
merged_df['state'].unique()

array(['TS', 'TN', 'DL', 'MH', 'KA', 'WB'], dtype=object)

In [51]:
merged_df[merged_df['state']=='TS']['sales_value'].sum()

903783337.2700009

In [54]:
len(merged_df['electronics_type'].unique())

30

store_id       0
sku            0
date           0
sales_qty      0
sales_value    0
dtype: int64