# Data Loading and Initial Exploration

This notebook covers loading data from multiple file formats (CSV, Excel), performing initial data exploration including shape, info, and descriptive statistics, and documenting data quality issues.

In [None]:
# Import necessary libraries
import pandas as pd

# Load data
sales_data = None
try:
    sales_csv = pd.read_csv('../data/raw/Global_Superstore2.csv', encoding='latin1')  # Adjust path and filename as needed
    print('CSV data loaded successfully')
    sales_data = sales_csv
except Exception as e:
    print(f'Error loading CSV data: {e}')

try:
    sales_excel = pd.read_excel('../data/raw/Global_Superstore2.xlsx')  # Adjust path and filename as needed
    print('Excel data loaded successfully')
    if sales_data is None:
        sales_data = sales_excel
except Exception as e:
    print(f'Error loading Excel data: {e}')

if sales_data is not None:
    # Initial exploration
    print('Data Shape:', sales_data.shape)
    print('Data Info:')
    print(sales_data.info())
    print('Data Description:')
    print(sales_data.describe(include='all'))
    
    # Document data quality issues
    missing_values = sales_data.isnull().sum()
    print('Missing values per column:')
    print(missing_values)
else:
    print('No data loaded successfully.')
