In [1]:
# importing module and library  
import pandas as pd
import numpy as np

### Connecting Data

In [2]:
# import 3 years sales data csv to df
Sales_2020 = pd.read_csv("Data\\AdventureWorks Sales Data 2020.csv")
Sales_2021 = pd.read_csv("Data\\AdventureWorks Sales Data 2021.csv")
Sales_2022 = pd.read_csv("Data\\AdventureWorks Sales Data 2022.csv")

# Append 3 years dataframe into 1 as Sales_Data
Sales_Data = pd.concat([Sales_2020, Sales_2021, Sales_2022])

# import AdventureWorks Calendar Lookup csv file as Calendar
Calendar = pd.read_csv("Data\\AdventureWorks Calendar Lookup.csv")

### Checking & Changing Data Types

In [3]:
Calendar.dtypes

Date    object
dtype: object

In [4]:
Sales_Data.dtypes

OrderDate        object
StockDate        object
OrderNumber      object
ProductKey        int64
CustomerKey       int64
TerritoryKey      int64
OrderLineItem     int64
OrderQuantity     int64
dtype: object

In [5]:
# Changing object data types datetime64[ns] using pd.to_datetime() for sales_data and calendar
Sales_Data["OrderDate"] = pd.to_datetime(Sales_Data["OrderDate"])
Sales_Data["StockDate"] = pd.to_datetime(Sales_Data["StockDate"])

Calendar["Date"] = pd.to_datetime(Calendar["Date"])

In [6]:
Sales_Data.dtypes

OrderDate        datetime64[ns]
StockDate        datetime64[ns]
OrderNumber              object
ProductKey                int64
CustomerKey               int64
TerritoryKey              int64
OrderLineItem             int64
OrderQuantity             int64
dtype: object

In [7]:
Calendar.dtypes

Date    datetime64[ns]
dtype: object

### Remove Empty Values

In [8]:
Calendar = Calendar[Calendar["Date"].notnull()]

### Adding Calculated Column

In [9]:
Sales_Data["OrderType"] = np.where(Sales_Data["OrderQuantity"] > 1, "Multiple Type", "Single Type") # Appriciate
Sales_Data = Sales_Data.reset_index().drop(columns=["index"])

Calendar["Start of Week"] = Calendar["Date"].dt.to_period("W").dt.to_timestamp() # Error
Calendar["Start of Month"] = Calendar["Date"].dt.to_period("M").dt.to_timestamp()
Calendar["Start of Year"] = Calendar["Date"].dt.to_period("Y").dt.to_timestamp()
Calendar["Year"] = Calendar["Date"].dt.year
Calendar

Unnamed: 0,Date,Start of Week,Start of Month,Start of Year,Year
0,2020-01-01,2019-12-30,2020-01-01,2020-01-01,2020
1,2020-01-02,2019-12-30,2020-01-01,2020-01-01,2020
2,2020-01-03,2019-12-30,2020-01-01,2020-01-01,2020
3,2020-01-04,2019-12-30,2020-01-01,2020-01-01,2020
4,2020-01-05,2019-12-30,2020-01-01,2020-01-01,2020
...,...,...,...,...,...
907,2022-06-26,2022-06-20,2022-06-01,2022-01-01,2022
908,2022-06-27,2022-06-27,2022-06-01,2022-01-01,2022
909,2022-06-28,2022-06-27,2022-06-01,2022-01-01,2022
910,2022-06-29,2022-06-27,2022-06-01,2022-01-01,2022


In [10]:
Sales_Data

Unnamed: 0,OrderDate,StockDate,OrderNumber,ProductKey,CustomerKey,TerritoryKey,OrderLineItem,OrderQuantity,OrderType
0,2020-01-01,2019-09-21,SO45080,332,14657,1,1,1,Single Type
1,2020-01-01,2019-12-05,SO45079,312,29255,4,1,1,Single Type
2,2020-01-01,2019-10-29,SO45082,350,11455,9,1,1,Single Type
3,2020-01-01,2019-11-16,SO45081,338,26782,6,1,1,Single Type
4,2020-01-02,2019-12-15,SO45083,312,14947,10,1,1,Single Type
...,...,...,...,...,...,...,...,...,...
56041,2022-06-30,2022-03-22,SO74143,477,28517,10,3,2,Multiple Type
56042,2022-06-30,2022-03-15,SO74143,479,28517,10,2,1,Single Type
56043,2022-06-30,2022-04-08,SO74143,606,28517,10,1,1,Single Type
56044,2022-06-30,2022-05-15,SO74124,480,21676,7,2,2,Multiple Type


### Validation

In [11]:
Sales_Data.dtypes

OrderDate        datetime64[ns]
StockDate        datetime64[ns]
OrderNumber              object
ProductKey                int64
CustomerKey               int64
TerritoryKey              int64
OrderLineItem             int64
OrderQuantity             int64
OrderType                object
dtype: object

In [12]:
Calendar.dtypes

Date              datetime64[ns]
Start of Week     datetime64[ns]
Start of Month    datetime64[ns]
Start of Year     datetime64[ns]
Year                       int32
dtype: object

In [13]:
Sales_Data.count()

OrderDate        56046
StockDate        56046
OrderNumber      56046
ProductKey       56046
CustomerKey      56046
TerritoryKey     56046
OrderLineItem    56046
OrderQuantity    56046
OrderType        56046
dtype: int64

In [14]:
Sales_Data["OrderType"].value_counts()

OrderType
Single Type      31421
Multiple Type    24625
Name: count, dtype: int64

In [15]:
Calendar["Date"].count()

912

In [16]:
Calendar["Year"].value_counts()

Year
2020    366
2021    365
2022    181
Name: count, dtype: int64

In [17]:
Calendar["Start of Year"].value_counts()

Start of Year
2020-01-01    366
2021-01-01    365
2022-01-01    181
Name: count, dtype: int64

In [18]:
Calendar["Start of Week"].nunique()

131

In [19]:
Calendar["Start of Month"].nunique()

30