# Retail Sales AnalysisThis notebook analyzes real retail sales data from the Online Retail dataset.

In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv('../data/online_retail.csv', encoding='unicode_escape')
print('Dataset shape:', df.shape)
print(df.head())

# Clean data
df = df.drop_duplicates()
df = df[df['Quantity'] > 0]
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
df['Total'] = df['Quantity'] * df['UnitPrice']

# Total sales by country
country_sales = df.groupby('Country')['Total'].sum().reset_index()
sns.barplot(data=country_sales.sort_values('Total', ascending=False).head(10), x='Total', y='Country')
plt.title('Top 10 Countries by Sales')
plt.show()

# Monthly sales over time
time_sales = df.groupby(df['InvoiceDate'].dt.to_period('M'))['Total'].sum().reset_index()
time_sales['InvoiceDate'] = time_sales['InvoiceDate'].dt.to_timestamp()
sns.lineplot(data=time_sales, x='InvoiceDate', y='Total', marker='o')
plt.title('Monthly Sales Over Time')
plt.xticks(rotation=45)
plt.show()