In [61]:
import pandas as pd
import pandas_datareader as pdr

# Pandas DataReader

Used to extract data from a wide range of Internet sources into a pandas DataFrame

In [62]:
pdr.DataReader(name='Microsoft',# dataset name
               data_source='stooq', # where the data will be acquired
               start='2010-01-01',end='2023-12-31' # period of the data
               )

### Selecting and filtering DateTime objects inside DataFrames

In [63]:
dates=pd.date_range(end='2023-04-06',periods=16, # End date and period expected 
              freq='2M') # 2 months, counting on the last day of each month

dates

DatetimeIndex(['2020-09-30', '2020-11-30', '2021-01-31', '2021-03-31',
               '2021-05-31', '2021-07-31', '2021-09-30', '2021-11-30',
               '2022-01-31', '2022-03-31', '2022-05-31', '2022-07-31',
               '2022-09-30', '2022-11-30', '2023-01-31', '2023-03-31'],
              dtype='datetime64[ns]', freq='2M')

In [64]:
df=pd.DataFrame(index=dates)
df['example']=1

df

Unnamed: 0,example
2020-09-30,1
2020-11-30,1
2021-01-31,1
2021-03-31,1
2021-05-31,1
2021-07-31,1
2021-09-30,1
2021-11-30,1
2022-01-31,1
2022-03-31,1


In [65]:
df.index #indexes
df.columns #column names
df.axes #all values from the columns and rows (both axis)
df.index.month_name() #name of the month of specific
df.index.day_name()

Index(['Wednesday', 'Monday', 'Sunday', 'Wednesday', 'Monday', 'Saturday',
       'Thursday', 'Tuesday', 'Monday', 'Thursday', 'Tuesday', 'Sunday',
       'Friday', 'Wednesday', 'Tuesday', 'Friday'],
      dtype='object')

    It is recommended to transform the string provide to datetime when the values are in datetime

In [66]:
df.loc[pd.Timestamp('2021-09-30')] 

example    1
Name: 2021-09-30 00:00:00, dtype: int64

In [67]:
df.loc[[pd.Timestamp('2021-09-30'),pd.Timestamp('2023-01-31')]]

Unnamed: 0,example
2021-09-30,1
2023-01-31,1


### .truncate()

In [68]:
df.truncate(before='2021-09-30', #first value,
            
            after='2023-01-31') #last/limit value

Unnamed: 0,example
2021-09-30,1
2021-11-30,1
2022-01-31,1
2022-03-31,1
2022-05-31,1
2022-07-31,1
2022-09-30,1
2022-11-30,1
2023-01-31,1


Works the same as the .loc slicing

In [69]:
df.loc['2021-09-30':'2023-01-31']

Unnamed: 0,example
2021-09-30,1
2021-11-30,1
2022-01-31,1
2022-03-31,1
2022-05-31,1
2022-07-31,1
2022-09-30,1
2022-11-30,1
2023-01-31,1


### .insert()

In [70]:
quarter=df.index.is_quarter_end

df.insert(1, #at index column 1
          'Is Quarter End', #column name
          quarter#data to be inserted
          )

df

Unnamed: 0,example,Is Quarter End
2020-09-30,1,True
2020-11-30,1,False
2021-01-31,1,False
2021-03-31,1,True
2021-05-31,1,False
2021-07-31,1,False
2021-09-30,1,True
2021-11-30,1,False
2022-01-31,1,False
2022-03-31,1,True


### pd.DateOffset

Add or remove a set amount of time

In [90]:
#subtracting:
df.index + pd.DateOffset(days=-5) 
#or
df.index - pd.DateOffset(days=5)

df.index+pd.DateOffset(weeks=5)#when is positive there is no need for the plus + sign

df.index+pd.DateOffset(months=1)

dates.index=df.index+pd.DateOffset(years=-3, months=2, days=1, hours=6) # you may add multiple parameters, even time
dates

DatetimeIndex(['2017-12-01 06:00:00', '2018-01-31 06:00:00',
               '2018-04-01 06:00:00', '2018-06-01 06:00:00',
               '2018-08-01 06:00:00', '2018-10-01 06:00:00',
               '2018-12-01 06:00:00', '2019-01-31 06:00:00',
               '2019-04-01 06:00:00', '2019-06-01 06:00:00',
               '2019-08-01 06:00:00', '2019-10-01 06:00:00',
               '2019-12-01 06:00:00', '2020-01-31 06:00:00',
               '2020-04-01 06:00:00', '2020-06-01 06:00:00'],
              dtype='datetime64[ns]', freq=None)

### TimeSeries .tseries

In [91]:
dates.index + pd.tseries.offsets.MonthEnd() # this will get every last end month day ahead of the provided values

DatetimeIndex(['2017-12-31 06:00:00', '2018-02-28 06:00:00',
               '2018-04-30 06:00:00', '2018-06-30 06:00:00',
               '2018-08-31 06:00:00', '2018-10-31 06:00:00',
               '2018-12-31 06:00:00', '2019-02-28 06:00:00',
               '2019-04-30 06:00:00', '2019-06-30 06:00:00',
               '2019-08-31 06:00:00', '2019-10-31 06:00:00',
               '2019-12-31 06:00:00', '2020-02-29 06:00:00',
               '2020-04-30 06:00:00', '2020-06-30 06:00:00'],
              dtype='datetime64[ns]', freq=None)

In [96]:
dates.index - pd.tseries.offsets.BMonthEnd() #last Business month End date from provided values

DatetimeIndex(['2017-11-30 06:00:00', '2017-12-29 06:00:00',
               '2018-03-30 06:00:00', '2018-05-31 06:00:00',
               '2018-07-31 06:00:00', '2018-09-28 06:00:00',
               '2018-11-30 06:00:00', '2018-12-31 06:00:00',
               '2019-03-29 06:00:00', '2019-05-31 06:00:00',
               '2019-07-31 06:00:00', '2019-09-30 06:00:00',
               '2019-11-29 06:00:00', '2019-12-31 06:00:00',
               '2020-03-31 06:00:00', '2020-05-29 06:00:00'],
              dtype='datetime64[ns]', freq=None)

### TimeDelta Object

Represents a time duration, an interval duration

allowed keywords are [weeks, days, hours, minutes, seconds, milliseconds, microseconds, nanoseconds]

In [101]:
time_a=df.index[0]
time_b=df.index[1]

time_b-time_a # the difference will be a Timedelta object

Timedelta('61 days 00:00:00')

In [103]:
time_a+pd.Timedelta(days=3)

Timestamp('2020-10-03 00:00:00', freq='2M')

    Creating Timedelta objects

In [109]:
pd.Timedelta(weeks=1, days=3, hours=5)

Timedelta('10 days 05:00:00')

it also interprets strings

In [112]:
pd.Timedelta('5 days, 10 hours, 7 minutes')

Timedelta('5 days 10:07:00')