# 1. BASIC PYTHON DATETIME MODULE

In [1]:
# datetime is one of Python's core standard libraries 
# We are going to use two methods to deal with dates/times: (1) date and (2) dateime
# date: helps us define dates only without including time (month, day, year)
# datetime: helps us define times and dates together (month, day, year, hour, second, microsecond)
# Let's import datetime module as dt
import pandas as pd
import datetime as dt

In [2]:
# Pick a date using Python's date method inside the datetime module
my_date = dt.date(2020, 3, 22)
my_date

datetime.date(2020, 3, 22)

In [3]:
# Check the data type (datetime object)
type(my_date)

datetime.date

In [4]:
# Convert it into string to view the date and time
str(my_date)

'2020-03-22'

In [5]:
# Attribute day will return the day contained in the datetime object
my_date.day

22

In [6]:
# Month attribute will return the month contained in the datetime object
my_date.month

3

In [7]:
# year attribute will return the year contained in the datetime object
my_date.year

2020

In [8]:
# Let's define a datetime using datetime method as follows
my_datetime = dt.datetime(2020, 3, 22, 8, 20, 50)

In [10]:
# - Convert it into string to view the date time
str(my_datetime)

'2020-03-22 08:20:50'

In [11]:
# Obtain the hour contained in the datetime object
my_datetime.hour

8

In [12]:
# Obtain the minutes contained in the datetime object
my_datetime.minute

20

In [13]:
# print out calendar!
import calendar
print(calendar.month(2021, 3))

     March 2021
Mo Tu We Th Fr Sa Su
 1  2  3  4  5  6  7
 8  9 10 11 12 13 14
15 16 17 18 19 20 21
22 23 24 25 26 27 28
29 30 31



In [14]:
# You can also use pd.datetime to convert a regular Pandas Series into datetime as follows:
dates = pd.Series(['2020/03/22', '2020-08-25', 'March 22nd 2020'])
dates

0         2020/03/22
1         2020-08-25
2    March 22nd 2020
dtype: object

In [15]:
# The to_datetime() method converts the date and time in string format to a DateTime object:
my_dates = pd.to_datetime(dates)
my_dates

0   2020-03-22
1   2020-08-25
2   2020-03-22
dtype: datetime64[ns]

**MINI CHALLENGE #1:**
- **Use Python's datetime method to write your date and time of your birth! Convert it into string format**

In [16]:
import datetime as dt
my_birth = dt.datetime(1992, 10, 25, 8, 9, 20)
str(my_birth)

'1992-10-25 08:09:20'

# 2. HANDLING DATES AND TIMES USING PANDAS 

In [2]:
# Timestamp is the pandas equivalent of python’s Datetime and is interchangeable with it in most cases. 
# It’s the type used for the entries that make up a DatetimeIndex, and other timeseries oriented data structures in pandas.
# Check this out: https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.html
pd.Timestamp('2020-3-22')

Timestamp('2020-03-22 00:00:00')

In [3]:
pd.Timestamp('2020, 3, 22, 10')

Timestamp('2020-03-22 10:00:00')

In [4]:
# Or you can define a Pandas Timestamp using Python datetime object 
pd.Timestamp(dt.datetime(2020, 3, 22, 8, 20, 50))

Timestamp('2020-03-22 08:20:50')

In [5]:
# Calculate difference between two dates
day_1 = pd.Timestamp('1998, 3, 22, 10')
day_2 = pd.Timestamp('2021, 3, 22, 10')
delta = day_2 - day_1
print(delta)

8401 days 00:00:00


In [6]:
# Let's define 3 dates for 3 separate transactions
date_1 = dt.date(2020, 3, 22)
date_2 = dt.date(2020, 4, 22)
date_3 = dt.date(2020, 5, 22)

In [7]:
# Let's put the 3 dates in a list as follows
dates_list = [date_1, date_2, date_3]

# Use Pandas DateTimeIndex to convert the list into datetime datatype as follows
# Datetime index constructor method creates a collection of dates
dates_index = pd.DatetimeIndex(dates_list)

In [8]:
dates_index

DatetimeIndex(['2020-03-22', '2020-04-22', '2020-05-22'], dtype='datetime64[ns]', freq=None)

In [9]:
# Define a list that carries 3 values corresponding to store sales
sales = [50, 55, 60]

In [10]:
# Define a Pandas Series using datetime and values as follows:
sales = pd.Series(data=sales, index=dates_index)
sales

2020-03-22    50
2020-04-22    55
2020-05-22    60
dtype: int64

In [12]:
# you can also define a range of dates as follows:
my_days = pd.date_range(start='2020-01-01', end='2020-04-01', freq='D')
my_days

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08',
               '2020-01-09', '2020-01-10', '2020-01-11', '2020-01-12',
               '2020-01-13', '2020-01-14', '2020-01-15', '2020-01-16',
               '2020-01-17', '2020-01-18', '2020-01-19', '2020-01-20',
               '2020-01-21', '2020-01-22', '2020-01-23', '2020-01-24',
               '2020-01-25', '2020-01-26', '2020-01-27', '2020-01-28',
               '2020-01-29', '2020-01-30', '2020-01-31', '2020-02-01',
               '2020-02-02', '2020-02-03', '2020-02-04', '2020-02-05',
               '2020-02-06', '2020-02-07', '2020-02-08', '2020-02-09',
               '2020-02-10', '2020-02-11', '2020-02-12', '2020-02-13',
               '2020-02-14', '2020-02-15', '2020-02-16', '2020-02-17',
               '2020-02-18', '2020-02-19', '2020-02-20', '2020-02-21',
               '2020-02-22', '2020-02-23', '2020-02-24', '2020-02-25',
      

In [13]:
# Datatype is a datetimeindex (which is a collection of dates!)
type(my_days)

pandas.core.indexes.datetimes.DatetimeIndex

In [15]:
# If you pick any sample date, it's represented in timestamp datatype
my_days[4]
type(my_days[4])

pandas._libs.tslibs.timestamps.Timestamp

In [16]:
# you can also define a range of dates using M which stands for month end as follows:
my_days = pd.date_range(start='2020-01-01', end='2020-08-01', freq='M')
my_days

DatetimeIndex(['2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31'],
              dtype='datetime64[ns]', freq='M')

In [17]:
# Alternative way of defining a list of dates
pd.date_range(start='2020-01-01', periods=20, freq='D')

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08',
               '2020-01-09', '2020-01-10', '2020-01-11', '2020-01-12',
               '2020-01-13', '2020-01-14', '2020-01-15', '2020-01-16',
               '2020-01-17', '2020-01-18', '2020-01-19', '2020-01-20'],
              dtype='datetime64[ns]', freq='D')

**MINI CHALLENGE #2:**
- **Obtain the business days between 2020-01-01 and 2020-04-01**

In [18]:
my_days = pd.date_range(start='2020-01-01', end='2020-04-01', freq='B')
my_days

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-06',
               '2020-01-07', '2020-01-08', '2020-01-09', '2020-01-10',
               '2020-01-13', '2020-01-14', '2020-01-15', '2020-01-16',
               '2020-01-17', '2020-01-20', '2020-01-21', '2020-01-22',
               '2020-01-23', '2020-01-24', '2020-01-27', '2020-01-28',
               '2020-01-29', '2020-01-30', '2020-01-31', '2020-02-03',
               '2020-02-04', '2020-02-05', '2020-02-06', '2020-02-07',
               '2020-02-10', '2020-02-11', '2020-02-12', '2020-02-13',
               '2020-02-14', '2020-02-17', '2020-02-18', '2020-02-19',
               '2020-02-20', '2020-02-21', '2020-02-24', '2020-02-25',
               '2020-02-26', '2020-02-27', '2020-02-28', '2020-03-02',
               '2020-03-03', '2020-03-04', '2020-03-05', '2020-03-06',
               '2020-03-09', '2020-03-10', '2020-03-11', '2020-03-12',
               '2020-03-13', '2020-03-16', '2020-03-17', '2020-03-18',
      

# 3. DATETIME IN ACTION! PRACTICAL EXAMPLE PART #1

In [None]:
# dataframes creation for both training and testing datasets 


# Convert date column to datetime format


# Date: The date of the observation
# AveragePrice: the average price of a single avocado
# type: conventional or organic
# Region: the city or region of the observation
# Total Volume: Total number of avocados sold


In [None]:
# You can select any column to be the index for the DataFrame


**MINI CHALLENGE #3:**
- **What are the datatypes of each column in the avocado_df DataFrame?**

# 4. DATETIME IN ACTION! PRACTICAL EXAMPLE PART #2

In [None]:
# access elements with a specific datetime index using .loc


In [None]:
# You can use iloc if you decide to use numeric indexes


In [None]:
# Access more than one element within a given date range


In [None]:
# Truncate a sorted DataFrame given index bounds.
# Make sure to sort the dataframe before applying truncate 


In [None]:
# you can offset (shift) all dates by days or month as follows


In [None]:
# Let's revert back to the original dataset!


In [None]:
# Once you have the index set to DateTime, this unlocks its power by performing aggregation
# Aggregating the data by year (A = annual)


In [None]:
# Aggregating the data by month (M = Month)


In [None]:
# You can obtain the maximum value for each Quarter end as follows: 


In [None]:
# You can locate the rows that satisfies a given critirea as follows: 


In [None]:
# You can create new features from the date time information as follows


In [None]:
# Don't forget to set the Date to be in the index for the DataFrame


**MINI CHALLENGE #4:**
- **Calculate the average avocado price per quarter end**

# 5. DATA PLOTTING (STRETCH ASSIGNMENT!)

In [None]:
# Once you have index set to DateTime, this unlocks its power by performing aggregation
# Aggregating the data by month end


In [None]:
# plot the avocado average price per month 


In [None]:
# plot the avocado average price per quarter  


In [None]:
# plot the avocado average price per annual basis 


In [None]:


plt.figure(figsize = (10, 7))
sns.violinplot(y = 'AveragePrice', x = 'type', data = avocado_df)

In [None]:
# plot the distribution plot of avocado prices (histogram + Kernel Denisty Estimate)


In [None]:
# Catplot is used to draw categorical plots onto a FacetGrid.
# Catplot provides access to several axes-level functions that show the relationship between a numerical and one or more categorical variables.



**MINI CHALLENGE #5:**
- **Plot the average price of avocado on a weekly basis**
- **Plot Catplot for price vs. region for organic food**