In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
from sklearn.metrics import mean_absolute_error as MAE

# Loading the dataset
data = pd.read_csv(r'C:\reading_csv\datasets\big_mart_sales.csv')

# Data Exploration: Checking for missing values
print(data.isna().sum())

# Shuffling the data to ensure randomness
data = shuffle(data, random_state=42)

# Dividing data into training (75%) and test (25%) sets
div = int(data.shape[0] / 4)
train = data.loc[:3 * div + 1, :]
test = data.loc[3 * div + 1:, :]

# Simple Mean Prediction: Mean of 'Item_Outlet_Sales'
test['simple_mean'] = train['Item_Outlet_Sales'].mean()

# Calculate Mean Absolute Error (MAE) for Simple Mean Prediction
simple_mean_error = MAE(test['Item_Outlet_Sales'], test['simple_mean'])
print(f"Simple Mean Error (MAE): {simple_mean_error}")

# Mean Sales based on Outlet_Type
out_type = pd.pivot_table(train, values='Item_Outlet_Sales', index=['Outlet_Type'], aggfunc=np.mean)
print(out_type)

# Initialize a new column in the test set to store predictions based on Outlet_Type
test['Out_type_mean'] = 0

# Predict the mean sales for each unique Outlet_Type in the test set
for outlet_type in train['Outlet_Type'].unique():
    mean_value = train[train['Outlet_Type'] == outlet_type]['Item_Outlet_Sales'].mean()
    test.loc[test['Outlet_Type'] == outlet_type, 'Out_type_mean'] = mean_value

# Calculate MAE for Outlet_Type based prediction
out_type_error = MAE(test['Item_Outlet_Sales'], test['Out_type_mean'])
print(f"Outlet Type Mean Error (MAE): {out_type_error}")

# Comparison of Simple Mean and Outlet-Type Mean Errors
if simple_mean_error > out_type_error:
    print("Outlet Type Mean Prediction performs better than Simple Mean Prediction.")
else:
    print("Simple Mean Prediction performs better.")
