In [1]:
import pandas as pd

# Load your dataset
df = pd.read_csv("sales_data.csv")

In [2]:

print("\n--- FIRST 5 ROWS ---")
print(df.head())



--- FIRST 5 ROWS ---
   Product_ID   Sale_Date Sales_Rep Region  Sales_Amount  Quantity_Sold  \
0        1052  2023-02-03       Bob  North       5053.97             18   
1        1093  2023-04-21       Bob   West       4384.02             17   
2        1015  2023-09-21     David  South       4631.23             30   
3        1072  2023-08-24       Bob  South       2167.94             39   
4        1061  2023-03-24   Charlie   East       3750.20             13   

  Product_Category  Unit_Cost  Unit_Price Customer_Type  Discount  \
0        Furniture     152.75      267.22     Returning      0.09   
1        Furniture    3816.39     4209.44     Returning      0.11   
2             Food     261.56      371.40     Returning      0.20   
3         Clothing    4330.03     4467.75           New      0.02   
4      Electronics     637.37      692.71           New      0.08   

  Payment_Method Sales_Channel Region_and_Sales_Rep  
0           Cash        Online            North-Bob  
1   

In [3]:
print("\n--- BASIC INFO ---")
print(df.info())


--- BASIC INFO ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 14 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Product_ID            1000 non-null   int64  
 1   Sale_Date             1000 non-null   object 
 2   Sales_Rep             1000 non-null   object 
 3   Region                1000 non-null   object 
 4   Sales_Amount          1000 non-null   float64
 5   Quantity_Sold         1000 non-null   int64  
 6   Product_Category      1000 non-null   object 
 7   Unit_Cost             1000 non-null   float64
 8   Unit_Price            1000 non-null   float64
 9   Customer_Type         1000 non-null   object 
 10  Discount              1000 non-null   float64
 11  Payment_Method        1000 non-null   object 
 12  Sales_Channel         1000 non-null   object 
 13  Region_and_Sales_Rep  1000 non-null   object 
dtypes: float64(4), int64(2), object(8)
memory usage: 109.

In [4]:
print("\n--- SHAPE OF DATA (Rows, Columns) ---")
print(df.shape)


--- SHAPE OF DATA (Rows, Columns) ---
(1000, 14)


In [5]:

print("\n--- MISSING VALUES ---")
print(df.isnull().sum())


--- MISSING VALUES ---
Product_ID              0
Sale_Date               0
Sales_Rep               0
Region                  0
Sales_Amount            0
Quantity_Sold           0
Product_Category        0
Unit_Cost               0
Unit_Price              0
Customer_Type           0
Discount                0
Payment_Method          0
Sales_Channel           0
Region_and_Sales_Rep    0
dtype: int64


In [6]:
print("\n--- SUMMARY STATISTICS ---")
print(df.describe())



--- SUMMARY STATISTICS ---
        Product_ID  Sales_Amount  Quantity_Sold    Unit_Cost   Unit_Price  \
count  1000.000000   1000.000000    1000.000000  1000.000000  1000.000000   
mean   1050.128000   5019.265230      25.355000  2475.304550  2728.440120   
std      29.573505   2846.790126      14.159006  1417.872546  1419.399839   
min    1001.000000    100.120000       1.000000    60.280000   167.120000   
25%    1024.000000   2550.297500      13.000000  1238.380000  1509.085000   
50%    1051.000000   5019.300000      25.000000  2467.235000  2696.400000   
75%    1075.000000   7507.445000      38.000000  3702.865000  3957.970000   
max    1100.000000   9989.040000      49.000000  4995.300000  5442.150000   

         Discount  
count  1000.00000  
mean      0.15239  
std       0.08720  
min       0.00000  
25%       0.08000  
50%       0.15000  
75%       0.23000  
max       0.30000  


In [7]:
# Total Sales
total_sales = df["Sales_Amount"].sum()
print(f"\nTotal Sales Amount: {total_sales}")


Total Sales Amount: 5019265.2299999995


In [8]:
# Best selling product (by quantity)
best_product = df.groupby("Product_ID")["Quantity_Sold"].sum().idxmax()
best_qty = df.groupby("Product_ID")["Quantity_Sold"].sum().max()

print(f"\nBest Selling Product ID: {best_product}")
print(f"Quantity Sold: {best_qty}")



Best Selling Product ID: 1090
Quantity Sold: 590


In [9]:
# Best Sales Rep (highest total sales)
best_sales_rep = df.groupby("Sales_Rep")["Sales_Amount"].sum().idxmax()
best_sales_amt = df.groupby("Sales_Rep")["Sales_Amount"].sum().max()

print(f"\nTop Sales Representative: {best_sales_rep}")
print(f"Total Sales by Rep: {best_sales_amt}")

# Region with highest sales
best_region = df.groupby("Region")["Sales_Amount"].sum().idxmax()
best_region_amt = df.groupby("Region")["Sales_Amount"].sum().max()

print(f"\nBest Performing Region: {best_region}")
print(f"Region Sales Amount: {best_region_amt}")


Top Sales Representative: David
Total Sales by Rep: 1141737.36

Best Performing Region: North
Region Sales Amount: 1369612.51


In [11]:
# Simple text report
print("\n--- SALES REPORT ---\n")
print(f"Total Sales: {total_sales}")
print(f"Best Selling Product: {best_product} (Sold {best_qty} units)")
print(f"Top Sales Rep: {best_sales_rep} (Sales = {best_sales_amt})")
print(f"Best Region: {best_region} (Sales = {best_region_amt})")


--- SALES REPORT ---

Total Sales: 5019265.2299999995
Best Selling Product: 1090 (Sold 590 units)
Top Sales Rep: David (Sales = 1141737.36)
Best Region: North (Sales = 1369612.51)
