In [3]:
import pandas as pd
import datetime as dt

# Review of Python's datetime Module

In [20]:
someday = dt.date(2018, 9, 14) #expects year, month, and day to create a date object
str(someday) # returns it as string

'2018-09-14'

In [9]:
someday.year # returns attribute year
someday.month # returns attribute month
someday.day #returns attribute day

14

In [22]:
sometime = dt.datetime(2018, 9, 14, 8, 30, 45) # execpts same arguments as date (but also can take times - default time is midnight if none is given)
                        # year, month, day, hour, minute, second
str(sometime) # returns it as a string

'2018-09-14 08:30:45'

In [25]:
sometime.year
sometime.month
sometime.day
sometime.hour
sometime.minute
sometime.second
# ^ all used with datetime

45

# The pandas Timestamp Object

In [45]:
pd.Timestamp("2015-03-31") #accepts same inputs as python datetime object
pd.Timestamp("2015/03/31")
pd.Timestamp("2013, 2, 23")
pd.Timestamp("1/4/2018")
pd.Timestamp("19/12/2019")
pd.Timestamp("11/11/2019") # if numbers are less than 12, pd won't know if it's month or days
pd.Timestamp("2021-03-08 13:35:08") #generates date with time
pd.Timestamp("2018-4-13 3:34:23 PM")

Timestamp('2018-04-13 15:34:23')

In [46]:
pd.Timestamp(dt.date(2015, 1, 1)) # creates a pd Timestamp from a python(dt) date object

Timestamp('2015-01-01 00:00:00')

In [47]:
pd.Timestamp(dt.datetime(2018, 2, 3, 13, 35, 24)) #creates a pd Timestamp from a python(dt) datetime object

Timestamp('2018-02-03 13:35:24')

# The pandas DatetimeIndex Object

In [55]:
dates = ["2016/01/02", "2016/04/12", "2007/09/07"]
pd.DatetimeIndex(dates) # converts strings in to pandas teimstamps and stores them into a new DateTimeIndex object

DatetimeIndex(['2016-01-02', '2016-04-12', '2007-09-07'], dtype='datetime64[ns]', freq=None)

In [59]:
dates = [dt.date(2016, 1, 1), dt.date(1994, 6, 13), dt.date(2003, 12, 29)]
dtIndex = pd.DatetimeIndex(dates) # works with list of python date objects (converts them into datetime objects)

In [60]:
values = [100, 200, 300]
pd.Series(data = values, index = dtIndex)# creates a Series with arbitrary values we created and the index is our above-created index

2016-01-01    100
1994-06-13    200
2003-12-29    300
dtype: int64

# The pd.to_datetime() Method

In [None]:
# converts to timestamp or DateTimeIndex(for multiple)

In [64]:
pd.to_datetime(["2001-04-19"])
pd.to_datetime(dt.date(2019, 1, 3))
pd.to_datetime(dt.datetime(2019, 1, 3, 14, 32, 20))
pd.to_datetime(["2019/01/23", "2018-03-29", "2018", "July 4th, 1996"])

DatetimeIndex(['2019-01-23', '2018-03-29', '2018-01-01', '1996-07-04'], dtype='datetime64[ns]', freq=None)

In [69]:
times = pd.Series(["2019/01/23", "2018-03-29", "2018", "July 4th, 1996"])
times

0        2019/01/23
1        2018-03-29
2              2018
3    July 4th, 1996
dtype: object

In [71]:
pd.to_datetime(times) # converts the times series as a datetime

0   2019-01-23
1   2018-03-29
2   2018-01-01
3   1996-07-04
dtype: datetime64[ns]

In [73]:
dates = pd.Series(["July 4th, 1996", "10/04/1991", "Hello", "2015-02-31"])

In [75]:
pd.to_datetime(dates, errors = "coerce") # cannot proccess "Hello" or February 31st(bc it does not exist)
                        # 'errors' parameter defaults to 'raise'
                        # ^ "coerce" forces all other values as NaT(null)

0   1996-07-04
1   1991-10-04
2          NaT
3          NaT
dtype: datetime64[ns]

In [76]:
pd.to_datetime([1349720105, 1349806505, 1349892905,
                1349979305, 1350065705], unit = "s") # unix datetime: seconds since January 1st, 1970 @ midnight
                                                    # use unit = "s" for seconds/datetime

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)

# Create Range of Dates with the pd.date_range() Method, part 1

In [106]:
times = pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "D") # requires two of the three start, end, and period methods
                                                        # frequency parameter defaults to "(n)D" for days as the interval 
                                                        # ^ "B" is business days
                                                        # ^^ "W" is week(defaults to W-SUN: 1 sunday per week)
                                                        # ^^^ "H" is hour
                                                        # ^^^^ "M" is month(end)
                                                        # ^ "MS" is month start
                                                        # ^^^^^ "A" is year/annual(end)
                                                        # ^ "AS" is year/annual start

In [107]:
type(times)

pandas.core.indexes.datetimes.DatetimeIndex

In [108]:
times[0]

Timestamp('2016-01-01 00:00:00', freq='D')

In [109]:
pd.date_range(start = "2016-01-01", end = "2050-01-01", freq = "AS")

DatetimeIndex(['2016-01-01', '2017-01-01', '2018-01-01', '2019-01-01',
               '2020-01-01', '2021-01-01', '2022-01-01', '2023-01-01',
               '2024-01-01', '2025-01-01', '2026-01-01', '2027-01-01',
               '2028-01-01', '2029-01-01', '2030-01-01', '2031-01-01',
               '2032-01-01', '2033-01-01', '2034-01-01', '2035-01-01',
               '2036-01-01', '2037-01-01', '2038-01-01', '2039-01-01',
               '2040-01-01', '2041-01-01', '2042-01-01', '2043-01-01',
               '2044-01-01', '2045-01-01', '2046-01-01', '2047-01-01',
               '2048-01-01', '2049-01-01', '2050-01-01'],
              dtype='datetime64[ns]', freq='AS-JAN')

# Create Range of Dates with the pd.date_range() Method, part 2

In [146]:
pd.date_range("2012-09-09", periods = 20, freq = "7H") # periods represent the number of results we want returned

DatetimeIndex(['2012-09-09 00:00:00', '2012-09-09 07:00:00',
               '2012-09-09 14:00:00', '2012-09-09 21:00:00',
               '2012-09-10 04:00:00', '2012-09-10 11:00:00',
               '2012-09-10 18:00:00', '2012-09-11 01:00:00',
               '2012-09-11 08:00:00', '2012-09-11 15:00:00',
               '2012-09-11 22:00:00', '2012-09-12 05:00:00',
               '2012-09-12 12:00:00', '2012-09-12 19:00:00',
               '2012-09-13 02:00:00', '2012-09-13 09:00:00',
               '2012-09-13 16:00:00', '2012-09-13 23:00:00',
               '2012-09-14 06:00:00', '2012-09-14 13:00:00'],
              dtype='datetime64[ns]', freq='7H')

# Create Range of Dates with the pd.date_range() Method, part 3

In [141]:
pd.date_range(end = "1999-12-31", periods = 53, freq = "SM") # returns a given number (periods) of results proceeding the "end" date

DatetimeIndex(['1997-10-31', '1997-11-15', '1997-11-30', '1997-12-15',
               '1997-12-31', '1998-01-15', '1998-01-31', '1998-02-15',
               '1998-02-28', '1998-03-15', '1998-03-31', '1998-04-15',
               '1998-04-30', '1998-05-15', '1998-05-31', '1998-06-15',
               '1998-06-30', '1998-07-15', '1998-07-31', '1998-08-15',
               '1998-08-31', '1998-09-15', '1998-09-30', '1998-10-15',
               '1998-10-31', '1998-11-15', '1998-11-30', '1998-12-15',
               '1998-12-31', '1999-01-15', '1999-01-31', '1999-02-15',
               '1999-02-28', '1999-03-15', '1999-03-31', '1999-04-15',
               '1999-04-30', '1999-05-15', '1999-05-31', '1999-06-15',
               '1999-06-30', '1999-07-15', '1999-07-31', '1999-08-15',
               '1999-08-31', '1999-09-15', '1999-09-30', '1999-10-15',
               '1999-10-31', '1999-11-15', '1999-11-30', '1999-12-15',
               '1999-12-31'],
              dtype='datetime64[ns]', freq='SM-

In [144]:
pd.date_range(end = "2019-01-01", start = "1999-01-01", periods = 5) #using all three 'end', 'start', & 'period' is possible

DatetimeIndex(['1999-01-01 00:00:00', '2004-01-01 06:00:00',
               '2008-12-31 12:00:00', '2013-12-31 18:00:00',
               '2019-01-01 00:00:00'],
              dtype='datetime64[ns]', freq=None)

# The .dt Accessor

In [147]:
# used like the .str accessor to access Series(or DataFrame) info if they are datetime objects

In [150]:
bunch_of_dates = pd.date_range(start = "2000-01-01", end = "2010-12-31", freq = "24D")

In [152]:
s = pd.Series(bunch_of_dates)

In [153]:
s.head(3)

0   2000-01-01
1   2000-01-25
2   2000-02-18
dtype: datetime64[ns]

In [156]:
s.dt.day # returns the day number
s.dt.month # returns the month number
s.dt.weekday_name # returns day of week day represented by each index value

0       Saturday
1        Tuesday
2         Friday
3         Monday
4       Thursday
5         Sunday
6      Wednesday
7       Saturday
8        Tuesday
9         Friday
10        Monday
11      Thursday
12        Sunday
13     Wednesday
14      Saturday
15       Tuesday
16        Friday
17        Monday
18      Thursday
19        Sunday
20     Wednesday
21      Saturday
22       Tuesday
23        Friday
24        Monday
25      Thursday
26        Sunday
27     Wednesday
28      Saturday
29       Tuesday
         ...    
138       Sunday
139    Wednesday
140     Saturday
141      Tuesday
142       Friday
143       Monday
144     Thursday
145       Sunday
146    Wednesday
147     Saturday
148      Tuesday
149       Friday
150       Monday
151     Thursday
152       Sunday
153    Wednesday
154     Saturday
155      Tuesday
156       Friday
157       Monday
158     Thursday
159       Sunday
160    Wednesday
161     Saturday
162      Tuesday
163       Friday
164       Monday
165     Thursd

In [164]:
mask = s.dt.is_quarter_start   # returns a boolean; True if day is start of a ____
s[mask]

0     2000-01-01
19    2001-04-01
38    2002-07-01
137   2009-01-01
dtype: datetime64[ns]

# Import Financial DataSet with pandas_datareader Library

In [105]:
import pandas as pd
import datetime as dt
from pandas_datareader import data

In [7]:
company = "MSFT"
start = "2016-01-01"
end = "2019-12-31"
stocks = data.DataReader(name = company, data_source = "iex", start = start, end = end) 

In [8]:
stocks.head(3)

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-04,50.4316,50.8773,49.5682,50.8773,53777963
2016-01-05,50.998,51.4251,50.6359,51.1094,34079674
2016-01-06,50.4316,50.506,49.8003,50.181,39518863


In [16]:
stocks.values # returns list sof 5 values of each row, each of which is a value in a larger array(or list, idk)
stocks.columns # returns list of column names
stocks.index # returns DateTime index
stocks.axes # combines result from '.index' and '.columns' attributes

[Index(['2016-01-04', '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',
        '2016-01-11', '2016-01-12', '2016-01-13', '2016-01-14', '2016-01-15',
        ...
        '2019-05-07', '2019-05-08', '2019-05-09', '2019-05-10', '2019-05-13',
        '2019-05-14', '2019-05-15', '2019-05-16', '2019-05-17', '2019-05-20'],
       dtype='object', name='date', length=850),
 Index(['open', 'high', 'low', 'close', 'volume'], dtype='object')]

# Selecting from a DataFrame with a DateTimeIndex

In [72]:
stocks = data.DataReader(name = company, data_source = "iex", start = start, end = end) 
stocks.head(3)

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-04,50.4316,50.8773,49.5682,50.8773,53777963
2016-01-05,50.998,51.4251,50.6359,51.1094,34079674
2016-01-06,50.4316,50.506,49.8003,50.181,39518863


In [73]:
stocks.loc["2016-01-07"] # returns a Series where index labels are the column nams and the values are form the given row
stocks.iloc[3] # same as above but based on index position

open      4.892760e+01
high      4.965640e+01
low       4.834270e+01
close     4.843550e+01
volume    5.656485e+07
Name: 2016-01-07, dtype: float64

In [74]:
stocks.loc["2018-01-01"] # error if the stock market is closed

KeyError: '2018-01-01'

In [76]:
stocks.loc["2017-10-01" : "2017-10-08"]

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-10-02,72.4926,72.7837,72.0899,72.3956,15304762
2017-10-03,72.4538,72.6576,71.9929,72.056,12190403
2017-10-04,71.8037,72.5023,71.5223,72.4732,13317681
2017-10-05,72.9875,73.8608,72.7352,73.7152,21195261
2017-10-06,73.4241,73.7734,73.298,73.7443,13959814


In [77]:
birthdays = pd.date_range(start = "1999-09-14", end = "2019-12-31", freq = pd.DateOffset(years = 1)) # DateOffset seperate intervals 1 year seperate

In [80]:
mask = stocks.index.isin(birthdays) # returns boolean / should have some Trues for the stock of days that are my birthday from above
# only returns False, even on my Birthday and IDK why

In [82]:
stocks[mask] # this is supposed to return a table of all those stocks with on my birthday
# the 'mask' above only returns false and IDK why

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1


# Timestamp Object Attibutes

In [None]:
import pandas as pd
import datetime as dt
from pandas_datareader import data

In [102]:
stocks = data.DataReader(name = company, data_source = "iex", start = start, end = end)
pd.DatetimeIndex(stocks) # do something here?
stocks.head(3)

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-01-04,50.4316,50.8773,49.5682,50.8773,53777963
2016-01-05,50.998,51.4251,50.6359,51.1094,34079674
2016-01-06,50.4316,50.506,49.8003,50.181,39518863


In [103]:
stocks.index[300]

'2017-03-14'

In [104]:
someday.day

AttributeError: 'str' object has no attribute 'day'