In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

features = pd.read_csv("features_data_set.csv")
sales = pd.read_csv("sales_data_set.csv")
stores = pd.read_csv("stores_data_set.csv")

print(features.head())
print(sales.head())
print(stores.head())

# Merge features and sales on Store and Date
features["Date"] = pd.to_datetime(features["Date"])
sales["Date"] = pd.to_datetime(sales["Date"])

df = sales.merge(features, on=["Store", "Date", "IsHoliday"], how="left")
df = df.merge(stores, on="Store", how="left")

fig = px.line(df.groupby("Date")["Weekly_Sales"].sum().reset_index(),
              x="Date", y="Weekly_Sales", title="Total Weekly Sales Over Time")
fig.show()


fig = px.box(df, x="IsHoliday", y="Weekly_Sales",
             title="Weekly Sales: Holiday vs Non-Holiday",
             color="IsHoliday")
fig.show()

fig = px.violin(df, x="Type", y="Weekly_Sales", color="Type",
                box=True, points="all",
                title="Sales Distribution by Store Type")
fig.show()

fig = px.scatter(df, x="MarkDown1", y="Weekly_Sales", color="IsHoliday",
                 hover_data=["Type", "Dept", "Temperature"],
                 title="Markdown vs Weekly Sales")
fig.show()

df["Week"] = df["Date"].dt.isocalendar().week
df["Year"] = df["Date"].dt.year

heatmap_df = df.groupby(["Store", "Week"])["Weekly_Sales"].mean().reset_index()
pivot = heatmap_df.pivot(index="Store", columns="Week", values="Weekly_Sales")

fig = px.imshow(pivot, labels=dict(x="Week", y="Store", color="Avg Weekly Sales"),
                title="Average Weekly Sales by Store and Week")
fig.show()


   Store        Date  Temperature  Fuel_Price  MarkDown1  MarkDown2  \
0      1  05/02/2010        42.31       2.572        NaN        NaN   
1      1  12/02/2010        38.51       2.548        NaN        NaN   
2      1  19/02/2010        39.93       2.514        NaN        NaN   
3      1  26/02/2010        46.63       2.561        NaN        NaN   
4      1  05/03/2010        46.50       2.625        NaN        NaN   

   MarkDown3  MarkDown4  MarkDown5         CPI  Unemployment  IsHoliday  
0        NaN        NaN        NaN  211.096358         8.106      False  
1        NaN        NaN        NaN  211.242170         8.106       True  
2        NaN        NaN        NaN  211.289143         8.106      False  
3        NaN        NaN        NaN  211.319643         8.106      False  
4        NaN        NaN        NaN  211.350143         8.106      False  
   Store  Dept        Date  Weekly_Sales  IsHoliday
0      1     1  05/02/2010      24924.50      False
1      1     1  12/02/201

ValueError: time data "19/02/2010" doesn't match format "%m/%d/%Y", at position 2. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.