In [1]:
# Task 5 - Data Analysis on CSV Files using Pandas

# --- Step 1: Import Libraries ---
import pandas as pd
import matplotlib.pyplot as plt

# Enable inline plotting (for Jupyter/Colab)
%matplotlib inline

# --- Step 2: Load the CSV file ---
# Added encoding parameter to handle non-UTF-8 characters
df = pd.read_csv("C:\\Users\\91962\\Desktop\\sales_data_sample.csv", encoding='latin1')

# --- Step 3: Explore the dataset ---
print("First 5 rows of the dataset:")
display(df.head())

print("\nDataset Info:")
df.info()

print("\nDataset Shape (rows, columns):", df.shape)

print("\nMissing Values in Each Column:")
print(df.isnull().sum())

# --- Step 4: Basic Data Cleaning (if needed) ---
# Fill missing values with 0 for numeric columns
df = df.fillna(0)

# --- Step 5: Grouping and Summarizing ---
# Example: Total sales by category
if "Category" in df.columns and "Sales" in df.columns:
    sales_by_category = df.groupby("Category")["Sales"].sum()
    print("\nTotal Sales by Category:")
    print(sales_by_category)

    # Plot sales by category
    sales_by_category.plot(kind="bar", color="skyblue", figsize=(8,5))
    plt.title("Total Sales by Category")
    plt.xlabel("Category")
    plt.ylabel("Total Sales")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# Example: Monthly sales trend (if 'Date' column exists)
if "Date" in df.columns and "Sales" in df.columns:
    df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
    monthly_sales = df.groupby(df["Date"].dt.to_period("M"))["Sales"].sum()
    print("\nMonthly Sales Trend:")
    print(monthly_sales)

    # Plot monthly sales trend
    monthly_sales.plot(kind="line", marker="o", figsize=(10,5))
    plt.title("Monthly Sales Trend")
    plt.xlabel("Month")
    plt.ylabel("Total Sales")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# --- Step 6: Save Processed Data ---
df.to_csv("cleaned_sales.csv", index=False, encoding='utf-8')  # Specify encoding for output file
print("\nCleaned dataset saved as 'cleaned_sales.csv'")

ModuleNotFoundError: No module named 'matplotlib'