In [None]:
#TIME METHODS FOR DATE AND TIME DATA
#As you may already know, basic Python has a datetime object as well as a date or time object, 
#and it essentially is just an object that's specialized containing date and time information.
#That means you can extract things like what hour it is or what year it is from the datetime object.
#And Pandas, just like it had a list of .str methods, has a list of what it calls .dt methods, 
#datetime methods that allow us to easily extract information from a datetime object.
#And this is really useful when it comes to feature engineering and machine learning later on.

#Later on, we're gonna see that many machine learning methods are not able to actually understand a full datetime object.
#However, they can easily understand things that are more categorical such as a day of the week, weekend versus weekday or AM versus PM.

#ganna let me access info from a column in datetime format

In [5]:
import numpy as np
import pandas as pd

from datetime import datetime 

In [9]:
myyear = 2015
mymonth = 1
myday = 1
myhour = 2
mymin = 30
mysec = 15
#to show order or structure of a datetime object in python

In [15]:
#pass in my date (Y,M,D)
mydate = datetime(myyear,mymonth,myday)
mydate

datetime.datetime(2015, 1, 1, 0, 0)

In [21]:
mydatetime = datetime(myyear,mymonth,myday, myhour, mymin, mysec)
mydatetime

datetime.datetime(2015, 1, 1, 2, 30, 15)

In [49]:
#Well, this is a datetime object, meaning that it's actually cognizant of the fact that it contains information like the year or the month,...
mydatetime.day
#this becomes very useful in feature engineering

1

In [None]:
#TRANSFORM DF FROM OBJECT TO DATETIME FORMAT

In [35]:
myser = pd.Series(['Nov 3,1990', '2000-01-01', None])
myser
#righ now it understands that it is a string

0    Nov 3,1990
1    2000-01-01
2          None
dtype: object

In [45]:
#we need to ask pandas to try to understand just from a string code what is the actual datetime object:
timeser = pd.to_datetime(myser)
#gonna use yaer, month, day format
timeser
#now it know its a datetime

  timeser = pd.to_datetime(myser)


0   1990-11-03
1   2000-01-01
2          NaT
dtype: datetime64[ns]

In [51]:
timeser[0].year

1990

In [55]:
#HOW TO DIFFERENCIATE BTW EU AND US FORMATS:
obvi_UE ='31-12-2000'
pd.to_datetime(obvi_UE)

  pd.to_datetime(obvi_UE)


Timestamp('2000-12-31 00:00:00')

In [57]:
UE = '10-12-2000'
pd.to_datetime(UE)

Timestamp('2000-10-12 00:00:00')

In [59]:
#gonna read it as if imputted as M/D/Y cos it was developed by american -- i include parameter of 'dayfirst' (boolean) to indicate the day is first
pd.to_datetime(UE, dayfirst = True)

Timestamp('2000-12-10 00:00:00')

In [63]:
#if you've got your df with mixed UE and US you should clean it first before pandas

In [1]:
#HOW TO DEAL W CUSTOM TIME STREAM 
style_date = '12--Dec--2000'

In [9]:
pd.to_datetime(style_date, format= '%d--%b--%Y')
#to tell pandas what format to expect
#see docuumentation (https://docs.python.org/3/library/datetime.html) to see how to pass in hte correct format each time

Timestamp('2000-12-12 00:00:00')

In [11]:
custom = '12th of Dec 2000'
pd.to_datetime(custom)

Timestamp('2000-12-12 00:00:00')

In [17]:
#HOW TO READ CSV FILE W TIMESTAMP OBJECTS AND READ EM AS SO
sales = pd.read_csv('RetailSales_BeerWineLiquor.csv')
sales

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [21]:
sales['DATE']
#for now its just thinking of it as a string
#so we got to convert it so that it sees it as datetime

0      1992-01-01
1      1992-02-01
2      1992-03-01
3      1992-04-01
4      1992-05-01
          ...    
335    2019-12-01
336    2020-01-01
337    2020-02-01
338    2020-03-01
339    2020-04-01
Name: DATE, Length: 340, dtype: object

In [31]:
sales['DATE'] = pd.to_datetime(sales['DATE'])
sales['DATE']
#now it sees it as datetime

0     1992-01-01
1     1992-02-01
2     1992-03-01
3     1992-04-01
4     1992-05-01
         ...    
335   2019-12-01
336   2020-01-01
337   2020-02-01
338   2020-03-01
339   2020-04-01
Name: DATE, Length: 340, dtype: datetime64[ns]

In [35]:
#so i can call one specific row and ask for the year f ex
sales['DATE'][0].year

1992

In [39]:
#TO DIRECTLY READ ON THE CSV FILE AND AS A DATETIME:
#i say to read it but specify 'parse_dates = []' and i pass in the columns i want to be treating as datetimes
sales = pd.read_csv('RetailSales_BeerWineLiquor.csv', parse_dates=[0])
#the 0 refers to the first column (can see w head())

In [43]:
sales['DATE']
#itll automatically read it as datetime

0     1992-01-01
1     1992-02-01
2     1992-03-01
3     1992-04-01
4     1992-05-01
         ...    
335   2019-12-01
336   2020-01-01
337   2020-02-01
338   2020-03-01
339   2020-04-01
Name: DATE, Length: 340, dtype: datetime64[ns]

In [51]:
#HOW TO GROUP BY MONTH, YEAR,... WHE I HAVE TIME AS THE INDEX
sales = sales.set_index('DATE')

KeyError: "None of ['DATE'] are in the columns"

In [53]:
sales

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-01-01,1509
1992-02-01,1541
1992-03-01,1597
1992-04-01,1675
1992-05-01,1822
...,...
2019-12-01,6630
2020-01-01,4388
2020-02-01,4533
2020-03-01,5562


In [55]:
#now i call the 'resample' function - its like group by but for datetime format
sales.resample(rule= 'A').mean()
#'A' is for year - to see which to use depending on what i want see documentation to express it
#as in the group by i have to then specify the aggregation method or it wont do anything

  sales.resample(rule= 'A').mean()


Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-12-31,1807.25
1993-12-31,1794.833333
1994-12-31,1841.75
1995-12-31,1833.916667
1996-12-31,1929.75
1997-12-31,2006.75
1998-12-31,2115.166667
1999-12-31,2206.333333
2000-12-31,2375.583333
2001-12-31,2468.416667


In [59]:
#USE .DT
sales = pd.read_csv('RetailSales_BeerWineLiquor.csv', parse_dates=[0])
#just as with text you have '.str', with datetime you have '.dt'

In [61]:
sales['DATE'].dt.year

0      1992
1      1992
2      1992
3      1992
4      1992
       ... 
335    2019
336    2020
337    2020
338    2020
339    2020
Name: DATE, Length: 340, dtype: int32