## Canada - Product Vs Price Trend and Analysis

In [224]:
# !pip install pandas

Data Sources
https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1810024502

In [225]:
import pandas as pd

In [226]:
file = pd.read_csv('./sales_data.csv')

In [227]:
df = pd.DataFrame(file)

In [228]:
df.head(20)

Unnamed: 0,REF_DATE,GEO,DGUID,Products,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,VALUE,STATUS,SYMBOL,TERMINATED,DECIMALS
0,2017-01,Canada,2016A000011124,"Beef stewing cuts, per kilogram",Dollars,81,units,0,v1353834271,11.1,12.66,,,,2
1,2017-01,Canada,2016A000011124,"Beef striploin cuts, per kilogram",Dollars,81,units,0,v1353834272,11.2,21.94,,,,2
2,2017-01,Canada,2016A000011124,"Beef top sirloin cuts, per kilogram",Dollars,81,units,0,v1353834273,11.3,13.44,,,,2
3,2017-01,Canada,2016A000011124,"Beef rib cuts, per kilogram",Dollars,81,units,0,v1353834311,11.41,20.17,,,,2
4,2017-01,Canada,2016A000011124,"Ground beef, per kilogram",Dollars,81,units,0,v1353834274,11.4,9.12,,,,2
5,2017-01,Canada,2016A000011124,"Pork loin cuts, per kilogram",Dollars,81,units,0,v1353834275,11.5,7.34,,,,2
6,2017-01,Canada,2016A000011124,"Pork rib cuts, per kilogram",Dollars,81,units,0,v1353834276,11.6,7.37,,,,2
7,2017-01,Canada,2016A000011124,"Pork shoulder cuts, per kilogram",Dollars,81,units,0,v1353834312,11.42,4.76,,,,2
8,2017-01,Canada,2016A000011124,"Whole chicken, per kilogram",Dollars,81,units,0,v1353834277,11.7,5.15,,,,2
9,2017-01,Canada,2016A000011124,"Chicken breasts, per kilogram",Dollars,81,units,0,v1353834278,11.8,11.38,,,,2


In [229]:
df.head(5)

Unnamed: 0,REF_DATE,GEO,DGUID,Products,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,VALUE,STATUS,SYMBOL,TERMINATED,DECIMALS
0,2017-01,Canada,2016A000011124,"Beef stewing cuts, per kilogram",Dollars,81,units,0,v1353834271,11.1,12.66,,,,2
1,2017-01,Canada,2016A000011124,"Beef striploin cuts, per kilogram",Dollars,81,units,0,v1353834272,11.2,21.94,,,,2
2,2017-01,Canada,2016A000011124,"Beef top sirloin cuts, per kilogram",Dollars,81,units,0,v1353834273,11.3,13.44,,,,2
3,2017-01,Canada,2016A000011124,"Beef rib cuts, per kilogram",Dollars,81,units,0,v1353834311,11.41,20.17,,,,2
4,2017-01,Canada,2016A000011124,"Ground beef, per kilogram",Dollars,81,units,0,v1353834274,11.4,9.12,,,,2


In [230]:
df.drop(columns=['DGUID','UOM','UOM_ID','SCALAR_FACTOR','SCALAR_ID','VECTOR','COORDINATE','STATUS','TERMINATED','DECIMALS','SYMBOL'],inplace=True)


In [231]:
# Extracting the Year and Month from the DATE column
df['Year'] = df['REF_DATE'].str.split("-").str[0]
df['Month'] = df['REF_DATE'].str.split("-").str[1]

In [232]:
# Extracting Only Product name from the Products Column rather that weight and litre
df['Product'] = df['Products'].str.split(",").str[0]

In [233]:
# Add three new columns for GEO (Latitude and Longitude) of each provinces
# Add Inflation rate for each Year from 2017 - 2024

# Inflation Rate
inflation_rate = {
    "2017": 1.6,
    "2018": 2.27,
    "2019": 1.95,
    "2020": 0.72,
    "2021": 3.4,
    "2022": 6.8,
    "2023": 3.88,
    "2024": 2.44
}

# GEO Coordinate
province_coordinates = {
    "Alberta": "53.9333, -116.5765",
    "British Columbia": "53.7267, -127.6476",
    "Manitoba": "53.7609, -98.8139",
    "New Brunswick": "46.5653, -66.4619",
    "Newfoundland and Labrador": "53.1355, -57.6604",
    "Nova Scotia": "44.6820, -63.7443",
    "Ontario": "51.2538, -85.3232",
    "Prince Edward Island": "46.5107, -63.4168",
    "Quebec": "52.9399, -73.5491",
    "Saskatchewan": "52.9399, -106.4509",
    "Northwest Territories": "64.8255, -124.8457",
    "Nunavut": "70.2998, -83.1076",
    "Yukon": "64.2823, -135.0000"
}


In [234]:
# Renaming the column with proper format and case
df = df.rename(columns={'REF_DATE':'Date','GEO':'Province','VALUE':'Price'})
# Assign canada as country column
df['Country'] = 'Canada'
# Dropping the Original Products Columns since we have a new column for the product names
df = df.drop(columns=['Products'])

In [235]:
df.head(2)

Unnamed: 0,Date,Province,Price,Year,Month,Product,Country
0,2017-01,Canada,12.66,2017,1,Beef stewing cuts,Canada
1,2017-01,Canada,21.94,2017,1,Beef striploin cuts,Canada


In [236]:
# Mapping GEO Coordinate and Yearly Inflation Rate
df['Coordinate'] = df['Province'].map(province_coordinates)
df['Lat'] = df['Coordinate'].str.split(",").str[0]
df['Long'] = df['Coordinate'].str.split(",").str[1]
df['Inflation'] = df['Year'].map(inflation_rate)
# Drop the Coordinate Column
df.drop(columns=['Coordinate'],inplace=True)

In [245]:
# Dropping all the rows with Provinces== Canada because
# We are analyzing the price vs product trend of all the provinces in Canada
df_clean =  df[
    (df['Province'] !='Canada')&
    (df['Year'] >"2017")
    ]

In [239]:
# Change the Data type of the Coordinate to Float
df_clean['Lat'] = df_clean['Lat'].astype(float)
df_clean['Long'] = df_clean['Long'].astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean['Lat'] = df_clean['Lat'].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean['Long'] = df_clean['Long'].astype(float)


In [240]:
# df_clean = df_clean[['Date','Year','Month','Country','Province','Product','Price','Inflation','Lat','Long']]
# Column Rearrange
arrange_column = ['Date','Year','Month','Country','Province','Product','Price','Inflation','Lat','Long']
df_clean = df_clean[arrange_column]

In [250]:
df_clean.head(3)

Unnamed: 0,Date,Province,Price,Year,Month,Product,Country,Lat,Long,Inflation
14618,2018-01,Newfoundland and Labrador,13.4,2018,1,Beef stewing cuts,Canada,53.1355,-57.6604,2.27
14619,2018-01,Newfoundland and Labrador,16.29,2018,1,Beef striploin cuts,Canada,53.1355,-57.6604,2.27
14620,2018-01,Newfoundland and Labrador,17.14,2018,1,Beef top sirloin cuts,Canada,53.1355,-57.6604,2.27


In [242]:
# Add three new columns 
df_clean.dtypes

Date          object
Year          object
Month         object
Country       object
Province      object
Product       object
Price        float64
Inflation    float64
Lat          float64
Long         float64
dtype: object