In [1]:
import pandas as pd

# Load dataset
df = pd.read_csv("sales_data.csv")

print("First 5 rows:")
print(df.head(), "\n")

# Basic information
print("Dataset Info:")
print(df.info(), "\n")

# Handle missing values
df = df.dropna()

# Convert Total_Sales column to numeric (if needed)
df["Total_Sales"] = pd.to_numeric(df["Total_Sales"], errors="coerce")

# Remove rows with invalid sales values
df = df.dropna(subset=["Total_Sales"])

# Calculate metrics
total_revenue = df["Total_Sales"].sum()
average_sales = df["Total_Sales"].mean()
max_sales = df["Total_Sales"].max()

# Best selling product
best_product = df.groupby("Product")["Total_Sales"].sum().idxmax()

# Display results
print("----- Sales Analysis Report -----")
print(f"Total Revenue: ₹{total_revenue:.2f}")
print(f"Average Sales: ₹{average_sales:.2f}")
print(f"Highest Single Sale: ₹{max_sales:.2f}")
print(f"Best Selling Product: {best_product}")


First 5 rows:
         Date     Product  Quantity  Price Customer_ID Region  Total_Sales
0  2024-01-01       Phone         7  37300     CUST001   East       261100
1  2024-01-02  Headphones         4  15406     CUST002  North        61624
2  2024-01-03       Phone         2  21746     CUST003   West        43492
3  2024-01-04  Headphones         1  30895     CUST004   East        30895
4  2024-01-05      Laptop         8  39835     CUST005  North       318680 

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Date         100 non-null    object
 1   Product      100 non-null    object
 2   Quantity     100 non-null    int64 
 3   Price        100 non-null    int64 
 4   Customer_ID  100 non-null    object
 5   Region       100 non-null    object
 6   Total_Sales  100 non-null    int64 
dtypes: int64(3), object(4)
memory usage: 5.6+ K