# Pandas Cont...

# Missing Data

In [5]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f','h'],columns=['one', 'two', 'three'])
df

Unnamed: 0,one,two,three
a,0.533124,0.256272,0.119882
c,-1.006716,0.17241,-0.328887
e,0.162076,0.231619,0.134407
f,-0.878242,0.634532,0.498439
h,-1.265919,1.161068,0.541601


In [7]:
#Reindexing
df=df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
df

Unnamed: 0,one,two,three
a,0.533124,0.256272,0.119882
b,,,
c,-1.006716,0.17241,-0.328887
d,,,
e,0.162076,0.231619,0.134407
f,-0.878242,0.634532,0.498439
g,,,
h,-1.265919,1.161068,0.541601


# Check for Missing Values
To make detecting missing values easier (and across different array dtypes), Pandas provides the isnull() and notnull() functions

In [9]:
df.isnull()

Unnamed: 0,one,two,three
a,False,False,False
b,True,True,True
c,False,False,False
d,True,True,True
e,False,False,False
f,False,False,False
g,True,True,True
h,False,False,False


In [11]:
df.notnull()

Unnamed: 0,one,two,three
a,True,True,True
b,False,False,False
c,True,True,True
d,False,False,False
e,True,True,True
f,True,True,True
g,False,False,False
h,True,True,True


# Calculations with Missing Data

 1. When summing data, NA will be treated as Zero

 2. If the data are all NA, then the result will be NA

In [12]:
df

Unnamed: 0,one,two,three
a,0.533124,0.256272,0.119882
b,,,
c,-1.006716,0.17241,-0.328887
d,,,
e,0.162076,0.231619,0.134407
f,-0.878242,0.634532,0.498439
g,,,
h,-1.265919,1.161068,0.541601


In [13]:
#When summing data, NA will be treated as Zero
df.sum()

one     -2.455676
two      2.455902
three    0.965441
dtype: float64

In [15]:
#If the data are all NA, then the result will be NA
df.iloc[1]+df.iloc[3]+df.iloc[6]

one     NaN
two     NaN
three   NaN
dtype: float64

In [25]:
df

Unnamed: 0,one,two,three
a,0.533124,0.256272,0.119882
b,,,
c,-1.006716,0.17241,-0.328887
d,,,
e,0.162076,0.231619,0.134407
f,-0.878242,0.634532,0.498439
g,,,
h,-1.265919,1.161068,0.541601


In [17]:
#Replace NaN with a Scalar Value
#The following program shows how you can replace "NaN" with "0".
df.fillna(0)

Unnamed: 0,one,two,three
a,0.533124,0.256272,0.119882
b,0.0,0.0,0.0
c,-1.006716,0.17241,-0.328887
d,0.0,0.0,0.0
e,0.162076,0.231619,0.134407
f,-0.878242,0.634532,0.498439
g,0.0,0.0,0.0
h,-1.265919,1.161068,0.541601


In [22]:
#pad/ffill
#Fill methods Forward
df.fillna(method='ffill') 

Unnamed: 0,one,two,three
a,0.533124,0.256272,0.119882
b,0.533124,0.256272,0.119882
c,-1.006716,0.17241,-0.328887
d,-1.006716,0.17241,-0.328887
e,0.162076,0.231619,0.134407
f,-0.878242,0.634532,0.498439
g,-0.878242,0.634532,0.498439
h,-1.265919,1.161068,0.541601


In [24]:
#bfill/backfill
#Fill methods Backward
df.fillna(method='bfill') 

Unnamed: 0,one,two,three
a,0.533124,0.256272,0.119882
b,-1.006716,0.17241,-0.328887
c,-1.006716,0.17241,-0.328887
d,0.162076,0.231619,0.134407
e,0.162076,0.231619,0.134407
f,-0.878242,0.634532,0.498439
g,-1.265919,1.161068,0.541601
h,-1.265919,1.161068,0.541601


# Drop Missing Values
If you want to simply exclude the missing values, then use the dropna function along with the axis argument. By default, axis=0, i.e., along row, which means that if any value within a row is NA then the whole row is excluded.


In [32]:
# by default it will drop the row
df.dropna()

Unnamed: 0,one,two,three
a,0.533124,0.256272,0.119882
c,-1.006716,0.17241,-0.328887
e,0.162076,0.231619,0.134407
f,-0.878242,0.634532,0.498439
h,-1.265919,1.161068,0.541601


In [33]:
#Dropping the missing values in Entire Column
df.dropna(axis=1)

a
b
c
d
e
f
g
h


In [35]:
#Creating the DataFrame
df1 = pd.DataFrame({'one':[10,20,30,40,50,2000], 'two':[1000,0,30,40,50,60]})
df1

Unnamed: 0,one,two
0,10,1000
1,20,0
2,30,30
3,40,40
4,50,50
5,2000,60


In [38]:
#Replace the Generic Value
df1.replace({1000:44,2000:550})

Unnamed: 0,one,two
0,10,44
1,20,0
2,30,30
3,40,40
4,50,50
5,550,60


# Group by Operation 
 1. Splitting the Object
 2. Applying a function
 3. Combining the results
 
we split the data into sets and we apply some functionality on each subset. In the apply functionality, we can perform the following operations −

 1. Aggregation − computing a summary statistic
 2. Transformation − perform some group-specific operation
 3. Filtration − discarding the data with some condition

In [66]:
#DataSet

ipl_data = {'Team': ['Riders', 'Riders', 'Devils', 'Devils', 'Kings',
   'Kings', 'Kings', 'Kings', 'Riders', 'Royals', 'Royals', 'Riders'],
   'Rank': [1, 2, 2, 3, 3,4 ,1 ,1,2 , 4,1,2],
   'Year': [2014,2015,2014,2015,2014,2015,2016,2017,2016,2014,2015,2017],
   'Points':[876,789,863,673,741,812,756,788,694,701,804,690]}
df = pd.DataFrame(ipl_data)
df

Unnamed: 0,Team,Rank,Year,Points
0,Riders,1,2014,876
1,Riders,2,2015,789
2,Devils,2,2014,863
3,Devils,3,2015,673
4,Kings,3,2014,741
5,Kings,4,2015,812
6,Kings,1,2016,756
7,Kings,1,2017,788
8,Riders,2,2016,694
9,Royals,4,2014,701


# Split Data into Groups
Pandas object can be split into any of their objects. There are multiple ways to split an object like −

 1. obj.groupby('key')
 2. obj.groupby(['key1','key2'])
 3. obj.groupby(key,axis=1)

In [67]:
#It will Perform Groupby operation on the dataset with the index
df.groupby('Team')

<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x000002C4B89E6358>

In [68]:
#View Groups
df.groupby('Team').groups

{'Devils': Int64Index([2, 3], dtype='int64'),
 'Kings': Int64Index([4, 5, 6, 7], dtype='int64'),
 'Riders': Int64Index([0, 1, 8, 11], dtype='int64'),
 'Royals': Int64Index([9, 10], dtype='int64')}

In [69]:
#Group by operation for Two columns
df.groupby(['Team','Year']).groups

{('Devils', 2014): Int64Index([2], dtype='int64'),
 ('Devils', 2015): Int64Index([3], dtype='int64'),
 ('Kings', 2014): Int64Index([4], dtype='int64'),
 ('Kings', 2015): Int64Index([5], dtype='int64'),
 ('Kings', 2016): Int64Index([6], dtype='int64'),
 ('Kings', 2017): Int64Index([7], dtype='int64'),
 ('Riders', 2014): Int64Index([0], dtype='int64'),
 ('Riders', 2015): Int64Index([1], dtype='int64'),
 ('Riders', 2016): Int64Index([8], dtype='int64'),
 ('Riders', 2017): Int64Index([11], dtype='int64'),
 ('Royals', 2014): Int64Index([9], dtype='int64'),
 ('Royals', 2015): Int64Index([10], dtype='int64')}

# Iterating through Groups
With the groupby object in hand, we can iterate through the object similar to itertools.obj.

In [70]:
itr_group = df.groupby('Year')

for name,group in itr_group:
    print(name,group)

2014      Team  Rank  Year  Points
0  Riders     1  2014     876
2  Devils     2  2014     863
4   Kings     3  2014     741
9  Royals     4  2014     701
2015       Team  Rank  Year  Points
1   Riders     2  2015     789
3   Devils     3  2015     673
5    Kings     4  2015     812
10  Royals     1  2015     804
2016      Team  Rank  Year  Points
6   Kings     1  2016     756
8  Riders     2  2016     694
2017       Team  Rank  Year  Points
7    Kings     1  2017     788
11  Riders     2  2017     690


# Select a Group
Using the get_group() method, we can select a single group.

In [71]:
itr_group.get_group(2014)

Unnamed: 0,Team,Rank,Year,Points
0,Riders,1,2014,876
2,Devils,2,2014,863
4,Kings,3,2014,741
9,Royals,4,2014,701


# Aggregations
An aggregated function returns a single aggregated value for each group. Once the group by object is created, several aggregation operations can be performed on the grouped data.

In [72]:
#Aggregrations on Mean
idgroup= df.groupby('Year')
idgroup['Points'].agg(np.mean)

Year
2014    795.25
2015    769.50
2016    725.00
2017    739.00
Name: Points, dtype: float64

In [73]:
#Aggregrations on Size
grouped = df.groupby('Team')
grouped.agg(np.size)

Unnamed: 0_level_0,Rank,Year,Points
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Devils,2,2,2
Kings,4,4,4
Riders,4,4,4
Royals,2,2,2


In [74]:
#Applying Multiple Aggregation
grouped['Points'].agg([np.sum, np.mean, np.std,np.size])

Unnamed: 0_level_0,sum,mean,std,size
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Devils,1536,768.0,134.350288,2
Kings,3097,774.25,31.899582,4
Riders,3049,762.25,88.567771,4
Royals,1505,752.5,72.831998,2


# Filtration
Filtration filters the data on a defined criteria and returns the subset of data. The filter() function is used to filter the data.



In [75]:
df.groupby('Team').filter(lambda x: len(x) >= 3)

Unnamed: 0,Team,Rank,Year,Points
0,Riders,1,2014,876
1,Riders,2,2015,789
4,Kings,3,2014,741
5,Kings,4,2015,812
6,Kings,1,2016,756
7,Kings,1,2017,788
8,Riders,2,2016,694
11,Riders,2,2017,690


# Transformations
Transformation on a group or a column returns an object that is indexed the same size of that is being grouped. Thus, the transform should return a result that is the same size as that of a group chunk.

In [77]:
grouped = df.groupby('Team')
score = lambda x: (x - x.mean()) / x.std()*10
grouped.transform(score)

Unnamed: 0,Rank,Year,Points
0,-15.0,-11.61895,12.843272
1,5.0,-3.872983,3.020286
2,-7.071068,-7.071068,7.071068
3,7.071068,7.071068,-7.071068
4,5.0,-11.61895,-10.423334
5,11.666667,-3.872983,11.834011
6,-8.333333,3.872983,-5.721078
7,-8.333333,11.61895,4.310401
8,5.0,3.872983,-7.705963
9,7.071068,-7.071068,-7.071068


# Merging/Joining
Pandas provides a single function, merge, as the entry point for all standard database join operations between DataFrame object

Syntax:
------
pd.merge(left, right, how='inner', on=None, left_on=None, right_on=None,
left_index=False, right_index=False, sort=True)

Parameters:
----------
     left − A DataFrame object.

    right − Another DataFrame object.

    on − Columns (names) to join on. Must be found in both the left and right DataFrame objects.

    left_on − Columns from the left DataFrame to use as keys. Can either be column names or arrays with length equal to the length of the DataFrame.

    right_on − Columns from the right DataFrame to use as keys. Can either be column names or arrays with length equal to the length of the DataFrame.

    left_index − If True, use the index (row labels) from the left DataFrame as its join key(s). In case of a DataFrame with a MultiIndex (hierarchical), the number of levels must match the number of join keys from the right DataFrame.

    right_index − Same usage as left_index for the right DataFrame.

    how − One of 'left', 'right', 'outer', 'inner'. Defaults to inner. Each method has been described below.
    
    Merge Method	SQL Equivalent	  Description
            left	LEFT OUTER JOIN	Use keys from left object
            right   RIGHT OUTER JOIN   Use keys from right object
            outer   FULL OUTER JOIN    Use union of keys
            inner   INNER JOIN	     Use intersection of keys

    sort − Sort the result DataFrame by the join keys in lexicographical order. Defaults to True, setting to False will improve the performance substantially in many cases.

In [79]:
#Dataset to perform operation
left = pd.DataFrame({
   'id':[1,2,3,4,5],
   'Name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],
   'subject_id':['sub1','sub2','sub4','sub6','sub5']})
right = pd.DataFrame(
   {'id':[1,2,3,4,5],
   'Name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],
   'subject_id':['sub2','sub4','sub3','sub6','sub5']})

In [81]:
left

Unnamed: 0,id,Name,subject_id
0,1,Alex,sub1
1,2,Amy,sub2
2,3,Allen,sub4
3,4,Alice,sub6
4,5,Ayoung,sub5


In [83]:
right

Unnamed: 0,id,Name,subject_id
0,1,Billy,sub2
1,2,Brian,sub4
2,3,Bran,sub3
3,4,Bryce,sub6
4,5,Betty,sub5


In [86]:
#Merge Two Datframe on Single Key(column)
pd.merge(left,right,on='id')

Unnamed: 0,id,Name_x,subject_id_x,Name_y,subject_id_y
0,1,Alex,sub1,Billy,sub2
1,2,Amy,sub2,Brian,sub4
2,3,Allen,sub4,Bran,sub3
3,4,Alice,sub6,Bryce,sub6
4,5,Ayoung,sub5,Betty,sub5


In [87]:
#Merge Two Datframe on Multiple Key(column)
pd.merge(left,right,on=['id','subject_id'])

Unnamed: 0,id,Name_x,subject_id,Name_y
0,4,Alice,sub6,Bryce
1,5,Ayoung,sub5,Betty


In [89]:
#Left Join 
pd.merge(left, right, on='subject_id', how='left')

Unnamed: 0,id_x,Name_x,subject_id,id_y,Name_y
0,1,Alex,sub1,,
1,2,Amy,sub2,1.0,Billy
2,3,Allen,sub4,2.0,Brian
3,4,Alice,sub6,4.0,Bryce
4,5,Ayoung,sub5,5.0,Betty


In [91]:
#Right Join
pd.merge(left, right, on='subject_id', how='right')

Unnamed: 0,id_x,Name_x,subject_id,id_y,Name_y
0,2.0,Amy,sub2,1,Billy
1,3.0,Allen,sub4,2,Brian
2,4.0,Alice,sub6,4,Bryce
3,5.0,Ayoung,sub5,5,Betty
4,,,sub3,3,Bran


In [93]:
#Outer Join
pd.merge(left, right, how='outer', on='subject_id')

Unnamed: 0,id_x,Name_x,subject_id,id_y,Name_y
0,1.0,Alex,sub1,,
1,2.0,Amy,sub2,1.0,Billy
2,3.0,Allen,sub4,2.0,Brian
3,4.0,Alice,sub6,4.0,Bryce
4,5.0,Ayoung,sub5,5.0,Betty
5,,,sub3,3.0,Bran


In [94]:
#Inner Join
pd.merge(left, right, how='inner', on='subject_id')

Unnamed: 0,id_x,Name_x,subject_id,id_y,Name_y
0,2,Amy,sub2,1,Billy
1,3,Allen,sub4,2,Brian
2,4,Alice,sub6,4,Bryce
3,5,Ayoung,sub5,5,Betty


# Concatenation

Pandas provides various facilities for easily combining together Series, DataFrame

Syntax:
======
pd.concat(objs,axis=0,join='outer',join_axes=None,
ignore_index=False)

Parameters:
==========
objs − This is a sequence or mapping of Series, DataFrame, or Panel objects.

axis − {0, 1, ...}, default 0. This is the axis to concatenate along.

join − {‘inner’, ‘outer’}, default ‘outer’. How to handle indexes on other axis(es). Outer for union and inner for intersection.

ignore_index − boolean, default False. If True, do not use the index values on the concatenation axis. The resulting axis will be labeled 0, ..., n - 1.

join_axes − This is the list of Index objects. Specific indexes to use for the other (n-1) axes instead of performing inner/outer set logic.

In [108]:
#DataSet
one = pd.DataFrame({
   'Name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],
   'subject_id':['sub1','sub2','sub4','sub6','sub5'],
   'Marks_scored':[98,90,87,69,78]},
   index=[1,2,3,4,5])

two = pd.DataFrame({
   'Name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],
   'subject_id':['sub2','sub4','sub3','sub6','sub5'],
   'Marks_scored':[89,80,79,97,88]},
   index=[1,2,3,4,5])
(one,two)

(     Name subject_id  Marks_scored
 1    Alex       sub1            98
 2     Amy       sub2            90
 3   Allen       sub4            87
 4   Alice       sub6            69
 5  Ayoung       sub5            78,     Name subject_id  Marks_scored
 1  Billy       sub2            89
 2  Brian       sub4            80
 3   Bran       sub3            79
 4  Bryce       sub6            97
 5  Betty       sub5            88)

In [100]:
pd.concat([one,two])

Unnamed: 0,Name,subject_id,Marks_scored
1,Alex,sub1,98
2,Amy,sub2,90
3,Allen,sub4,87
4,Alice,sub6,69
5,Ayoung,sub5,78
1,Billy,sub2,89
2,Brian,sub4,80
3,Bran,sub3,79
4,Bryce,sub6,97
5,Betty,sub5,88


In [102]:
pd.concat([one,two],keys=['x','y'])

Unnamed: 0,Unnamed: 1,Name,subject_id,Marks_scored
x,1,Alex,sub1,98
x,2,Amy,sub2,90
x,3,Allen,sub4,87
x,4,Alice,sub6,69
x,5,Ayoung,sub5,78
y,1,Billy,sub2,89
y,2,Brian,sub4,80
y,3,Bran,sub3,79
y,4,Bryce,sub6,97
y,5,Betty,sub5,88


In [109]:
#ignore_index
pd.concat([one,two],keys=['x','y'],ignore_index=True)

Unnamed: 0,Name,subject_id,Marks_scored
0,Alex,sub1,98
1,Amy,sub2,90
2,Allen,sub4,87
3,Alice,sub6,69
4,Ayoung,sub5,78
5,Billy,sub2,89
6,Brian,sub4,80
7,Bran,sub3,79
8,Bryce,sub6,97
9,Betty,sub5,88


In [111]:
#Row Wise Concatenation
pd.concat([one,two],axis=1)

Unnamed: 0,Name,subject_id,Marks_scored,Name.1,subject_id.1,Marks_scored.1
1,Alex,sub1,98,Billy,sub2,89
2,Amy,sub2,90,Brian,sub4,80
3,Allen,sub4,87,Bran,sub3,79
4,Alice,sub6,69,Bryce,sub6,97
5,Ayoung,sub5,78,Betty,sub5,88


In [114]:
#Concatenation Using Append 
#Appending Single Column
one.append(two)

Unnamed: 0,Name,subject_id,Marks_scored
1,Alex,sub1,98
2,Amy,sub2,90
3,Allen,sub4,87
4,Alice,sub6,69
5,Ayoung,sub5,78
1,Billy,sub2,89
2,Brian,sub4,80
3,Bran,sub3,79
4,Bryce,sub6,97
5,Betty,sub5,88


In [116]:
#Apending Multiple Columns
one.append([two,one,two])

Unnamed: 0,Name,subject_id,Marks_scored
1,Alex,sub1,98
2,Amy,sub2,90
3,Allen,sub4,87
4,Alice,sub6,69
5,Ayoung,sub5,78
1,Billy,sub2,89
2,Brian,sub4,80
3,Bran,sub3,79
4,Bryce,sub6,97
5,Betty,sub5,88


# Time Series
Pandas provide a robust tool for working time with Time series data, especially in the financial sector. 
While working with time series data, we frequently come across the following −

 1. Generating sequence of time.
 2. Convert the time series to different frequencies.
 
Pandas provides a relatively compact and self-contained set of tools for performing the above tasks.

In [119]:
#Get Current Time
pd.datetime.now()
print(pd.datetime.now())

2020-07-14 12:12:22.203887


In [121]:
#Create a TimeStamp
pd.Timestamp('2017-03-01')

Timestamp('2017-03-01 00:00:00')

In [123]:
pd.Timestamp(1587687255,unit='s')

Timestamp('2020-04-24 00:14:15')

In [125]:
#Create a Range of Time
pd.date_range("11:00", "13:30", freq="30min").time

array([datetime.time(11, 0), datetime.time(11, 30), datetime.time(12, 0),
       datetime.time(12, 30), datetime.time(13, 0), datetime.time(13, 30)],
      dtype=object)

In [131]:
#Frequency of Time
pd.date_range("11:00", "13:30", freq="H").time

array([datetime.time(11, 0), datetime.time(12, 0), datetime.time(13, 0)],
      dtype=object)

In [132]:
#Converting to TimeStamp
pd.to_datetime(pd.Series(['Jul 31, 2009','2010-01-10', None]))

0   2009-07-31
1   2010-01-10
2          NaT
dtype: datetime64[ns]

In [134]:
pd.to_datetime(['2005/11/23', '2010.12.31', None])

DatetimeIndex(['2005-11-23', '2010-12-31', 'NaT'], dtype='datetime64[ns]', freq=None)

# Date Functionality
Extending the Time series, Date functionalities play major role in financial data analysis. While working with Date data, we will frequently come across the following −

 1. Generating sequence of dates
 2. Convert the date series to different frequencies

In [136]:
#Range of Dates  by default DAY--"D"
pd.date_range('1/1/2011', periods=5)

DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',
               '2011-01-05'],
              dtype='datetime64[ns]', freq='D')

In [138]:
#Frequency of Dates
pd.date_range('1/1/2011', periods=5,freq='M')

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31'],
              dtype='datetime64[ns]', freq='M')

In [147]:
#bdate_range -- Business Dates it excludes Saturday and Sunday
pd.bdate_range('5/5/2011', periods=7)

DatetimeIndex(['2011-05-05', '2011-05-06', '2011-05-09', '2011-05-10',
               '2011-05-11', '2011-05-12', '2011-05-13'],
              dtype='datetime64[ns]', freq='B')

In [148]:
#Date Range in External Inputs
start = pd.datetime(2011, 1, 1)
end = pd.datetime(2011, 1, 5)

pd.date_range(start, end)

DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',
               '2011-01-05'],
              dtype='datetime64[ns]', freq='D')

# Frequecy  Offset Aliases
A number of string aliases are given to useful common time series frequencies. We will refer to these aliases as offset aliases.

    Alias   Description
  1.  B	     business day frequency
  2.  BQS	 business quarter start frequency
  3.   D	 calendar day frequency
  4.   A	 annual(Year) end frequency
  5.   W	 weekly frequency
  6.  BA	 business year end frequency
  7.   M	 month end frequency
  8.  BAS	 business year start frequency
  9.  SMS	 semi-month end frequency
 10.  BH	 business hour frequency
 11.  BM	 business month end frequency
 12.   H	 hourly frequency
 13.   MS	 month start frequency
 14. T, min	 minutely frequency
 15.  SMS	 semi month start frequency
 16.   S	 secondly frequency
 17.   BMS	 business month start frequency
 18. L, ms	 milliseconds
 19.   Q	 quarter end frequency
 20. U, us	 microseconds
 21.   BQ	 business quarter end frequency
 22.   N	 nanoseconds
 23.   QS	 quarter start frequency

# TimeDelta
Timedeltas are differences in times, expressed in difference units, for example, days, hours, minutes, seconds. They can be both positive and negative.

In [150]:
#String
#By passing a string literal, we can create a timedelta object.
pd.Timedelta('2 days 2 hours 15 minutes 30 seconds')

Timedelta('2 days 02:15:30')

In [152]:
#Integer
#By passing an integer value with the unit, an argument creates a Timedelta object.
pd.Timedelta(6,unit='h')

Timedelta('0 days 06:00:00')

In [156]:
#Data Offsets
#Data offsets such as - weeks, days, hours, minutes, seconds, milliseconds, microseconds, nanoseconds can also be used
pd.Timedelta(days=2)

Timedelta('2 days 00:00:00')

In [159]:
#to_timedelta()
#it converts a scalar, array, list, or series from a recognized timedelta format/ value into a Timedelta type.
pd.Timedelta(days=2)

Timedelta('2 days 00:00:00')

In [161]:
#Addition Operations
s = pd.Series(pd.date_range('2012-1-1', periods=3, freq='D'))
td = pd.Series([ pd.Timedelta(days=i) for i in range(3) ])
df = pd.DataFrame(dict(A = s, B = td))
df['C']=df['A']+df['B']

df

Unnamed: 0,A,B,C
0,2012-01-01,0 days,2012-01-01
1,2012-01-02,1 days,2012-01-03
2,2012-01-03,2 days,2012-01-05


In [162]:
#Subtraction Operations
s = pd.Series(pd.date_range('2012-1-1', periods=3, freq='D'))
td = pd.Series([ pd.Timedelta(days=i) for i in range(3) ])
df = pd.DataFrame(dict(A = s, B = td))
df['C']=df['A']-df['B']

df

Unnamed: 0,A,B,C
0,2012-01-01,0 days,2012-01-01
1,2012-01-02,1 days,2012-01-01
2,2012-01-03,2 days,2012-01-01
