In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
dataset= pd.read_csv('Walmart_Sales.csv')
dataset

In [None]:
sample= dataset.sample(10)
sample

In [None]:
print(dataset.columns)

In [None]:
print(dataset.info())

In [None]:
print(dataset.shape)

In [None]:
print(dataset.describe())

In [None]:
print(dataset.describe(include= 'object'))

In [None]:
head= dataset.head()
head

In [None]:
dataset.dtypes

In [None]:
dataset.isnull().sum()

In [None]:
dataset.duplicated().sum()

In [None]:
plt.figure(figsize=(6,4))
sns.boxplot(x='Holiday_Flag', y='Weekly_Sales', data=dataset)
plt.title("Weekly Sales: Holiday vs Non-Holiday")
plt.show()

In [None]:
Q1 = dataset['Weekly_Sales'].quantile(0.25)
Q3 = dataset['Weekly_Sales'].quantile(0.75)
IQR = Q3 - Q1
IQR

In [None]:
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
print('Lower Bound:', lower_bound)
print('Upper Bound:', upper_bound)

In [None]:
dataset_no_outliers = dataset[
    (dataset['Weekly_Sales'] >= lower_bound) &
    (dataset['Weekly_Sales'] <= upper_bound)
]

In [None]:
plt.figure(figsize=(6,4))
sns.boxplot(
    x='Holiday_Flag',
    y='Weekly_Sales',
    data=dataset_no_outliers
)
plt.title("Weekly Sales: Holiday vs Non-Holiday (Outliers Removed)")
plt.show()

In [None]:
dataset['Date'] = pd.to_datetime(dataset['Date'], format='mixed', dayfirst=True)

In [None]:
dataset['Date'].head()

In [None]:
store_sales = dataset.groupby('Store')['Weekly_Sales'].mean().sort_values(ascending=False)
store_sales.head()

In [None]:
holiday_sales = dataset.groupby('Holiday_Flag')['Weekly_Sales'].mean()
holiday_sales

In [None]:
dataset['Year'] = dataset['Date'].dt.year
dataset['Month'] = dataset['Date'].dt.month
dataset['Week'] = dataset['Date'].dt.isocalendar().week.astype(int)

In [None]:
monthly_sales = dataset.groupby('Month')['Weekly_Sales'].mean()
monthly_sales.plot(marker= '.', title='Average Monthly Sales')
plt.show()

In [None]:
yearly_sales = dataset.groupby('Year')['Weekly_Sales'].mean()

yearly_sales.plot(marker='o', title='Average Yearly Sales')
plt.xlabel('Year')
plt.ylabel('Average Weekly Sales')
plt.xticks(yearly_sales.index.astype(int))
plt.show()



In [None]:
yearly_sales = dataset.groupby('Year')['Weekly_Sales'].sum()
yearly_sales

In [None]:
summary = {
'Total Stores': dataset['Store'].nunique(),
'Start Date': dataset['Date'].min(),
'End Date': dataset['Date'].max(),
'Average Weekly Sales': dataset['Weekly_Sales'].mean(),
'Holiday Sales Avg': holiday_sales[1],
'Non-Holiday Sales Avg': holiday_sales[0]
}

pd.Series(summary)

In [None]:
plt.figure(figsize=(8,5))
sns.histplot(dataset['Weekly_Sales'], bins=50, kde=True)
plt.title("Distribution of Weekly Sales")
plt.xlabel("Weekly Sales")
plt.ylabel("Frequency")
plt.show()

In [None]:
plt.figure(figsize=(7,4))
sns.histplot(dataset['Temperature'], bins=30, kde=True)
plt.title("Temperature Distribution")
plt.show()

In [None]:
plt.figure(figsize=(12,5))
plt.plot(dataset['Date'], dataset['Weekly_Sales'])
plt.title("Weekly Sales Trend Over Time")
plt.xlabel("Date")
plt.ylabel("Weekly Sales")
plt.show()

In [None]:
correlation = dataset.select_dtypes(include=['number']).corr()
correlation


In [None]:
plt.figure(figsize=(10,6))
sns.heatmap(correlation, annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()

In [None]:
plt.figure(figsize=(6,4))
sns.scatterplot(x='Fuel_Price', y='Weekly_Sales', data=dataset)
plt.title("Fuel Price vs Weekly Sales")
plt.show()

In [None]:
plt.figure(figsize=(6,4))
sns.scatterplot(x='CPI', y='Weekly_Sales', data=dataset)
plt.title("CPI vs Weekly Sales")
plt.show()

In [None]:
plt.figure(figsize=(6,4))
sns.scatterplot(x='Unemployment', y='Weekly_Sales', data=dataset)
plt.title("Unemployment vs Weekly Sales")
plt.show()

In [None]:
store_sales = dataset.groupby('Store')['Weekly_Sales'].mean().sort_values(ascending=False)

plt.figure(figsize=(12,5))
store_sales.plot(kind='bar')
plt.title("Average Weekly Sales by Store")
plt.ylabel("Weekly Sales")
plt.show()


Insights

- Sales show clear seasonal patterns.

- Holiday weeks generally have higher sales.

- Certain stores consistently outperform others.

- Economic factors like CPI and unemployment show mild correlation.

- Weekly sales distribution is right-skewed with high outliers.