## Compare Data Completeness Over Time

**Description**: Analyze the trend of missing data in `"sales_data.csv"` over several months stored in a "date" column. Visualize missing data rates by month.

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sales_data = None  # Initialize to None
try:
    sales_data = pd.read_csv('sales_data.csv')
except FileNotFoundError:
    print("Error: 'sales_data.csv' was not found. Please ensure the file is in the correct directory.")
    exit()

if sales_data is not None:
    if 'date' in sales_data.columns:
        sales_data['date'] = pd.to_datetime(sales_data['date'])
        sales_data['month'] = sales_data['date'].dt.to_period('M')  # Extract year-month
        missing_data_rates = sales_data.groupby('month').apply(lambda x: x.isnull().mean() * 100)
        plt.figure(figsize=(12, 6))
        sns.heatmap(missing_data_rates, cmap='viridis', annot=True, fmt=".1f")
        plt.title('Missing Data Rates by Month')
        plt.xlabel('Columns')
        plt.ylabel('Month')
        plt.tight_layout()
        plt.show()
        print("\nMissing Data Rates by Month (Table):")
        print(missing_data_rates)

    else:
        print("Error: 'date' column not found in sales_data.csv. Ensure the date column is present.")
else:
    print("Could not proceed with analyzing data completeness because 'sales_data.csv' was not loaded.")

Error: 'sales_data.csv' was not found. Please ensure the file is in the correct directory.
Could not proceed with analyzing data completeness because 'sales_data.csv' was not loaded.


: 