In [4]:
import pandas as pd
import numpy as np


In [5]:
df = pd.read_csv("retail_time_data.csv")
df.head()


Unnamed: 0,date,store,product,category,sales,price,discount
0,2023-01-01,Store_A,Phone_X,Electronics,120,15000,0.1
1,2023-01-02,Store_A,Rice_5kg,Grocery,300,150,0.05
2,2023-01-03,Store_B,Phone_X,Electronics,90,15000,0.15
3,2023-01-04,Store_B,Jeans_Men,Clothing,200,1200,0.2
4,2023-01-05,Store_C,Rice_5kg,Grocery,350,150,0.0


In [6]:
df['date'] = pd.to_datetime(df['date'])


In [7]:
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['week'] = df['date'].dt.isocalendar().week.astype(int)
df['day'] = df['date'].dt.day
df['weekday'] = df['date'].dt.dayofweek


In [8]:
df['is_weekend'] = np.where(df['weekday'] >= 5, 1, 0)


In [9]:
def season(month):
    if month in [12,1,2]:
        return 'Winter'
    elif month in [3,4,5]:
        return 'Summer'
    elif month in [6,7,8]:
        return 'Monsoon'
    else:
        return 'Autumn'

df['season'] = df['month'].apply(season)


In [10]:
df['festive_flag'] = np.where(
    (df['month'] == 1) & (df['day'] <= 10), 1, 0
)


In [11]:
df['discount_flag'] = np.where(df['discount'] > 0, 1, 0)


In [12]:
df = df.sort_values(['product', 'date'])

df['lag_1_day_sales'] = df.groupby('product')['sales'].shift(1)
df['lag_7_day_sales'] = df.groupby('product')['sales'].shift(7)


In [13]:
df['rolling_3day_sales'] = (
    df.groupby('product')['sales']
      .transform(lambda x: x.rolling(3).mean())
)

df['rolling_7day_sales'] = (
    df.groupby('product')['sales']
      .transform(lambda x: x.rolling(7).mean())
)


In [14]:
df['sales_trend'] = df['sales'] - df['rolling_3day_sales']


In [15]:
df.fillna(0, inplace=True)


In [16]:
df.head()
df.info()



<class 'pandas.core.frame.DataFrame'>
Index: 12 entries, 3 to 11
Data columns (total 21 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   date                12 non-null     datetime64[ns]
 1   store               12 non-null     object        
 2   product             12 non-null     object        
 3   category            12 non-null     object        
 4   sales               12 non-null     int64         
 5   price               12 non-null     int64         
 6   discount            12 non-null     float64       
 7   year                12 non-null     int32         
 8   month               12 non-null     int32         
 9   week                12 non-null     int64         
 10  day                 12 non-null     int32         
 11  weekday             12 non-null     int32         
 12  is_weekend          12 non-null     int64         
 13  season              12 non-null     object        
 14  f

In [17]:
df.to_csv("time_domain_engineered_retail_data.csv", index=False)
