#### Time Series

In [10]:
import pandas as pd
import numpy as np

In [7]:
print (pd.datetime.now())

print (pd.Timestamp('2017-03-01'))

print (pd.Timestamp(1587687255,unit='s'))

2020-08-12 00:01:39.972378
2017-03-01 00:00:00
2020-04-24 00:14:15


  """Entry point for launching an IPython kernel.


In [10]:
# range of time
print (pd.date_range("11:00", "13:30", freq="30min").time)
print("============")

print (pd.date_range("11:00", "13:30", freq="H").time)

[datetime.time(11, 0) datetime.time(11, 30) datetime.time(12, 0)
 datetime.time(12, 30) datetime.time(13, 0) datetime.time(13, 30)]
[datetime.time(11, 0) datetime.time(12, 0) datetime.time(13, 0)]


In [13]:
# convert to timestamp

print (pd.to_datetime(pd.Series(['Jul 31, 2009','2010-01-10', None]))) # NAT = Not a time
print("============")

print (pd.to_datetime(['2005/11/23', '2010.12.31', None]))

0   2009-07-31
1   2010-01-10
2          NaT
dtype: datetime64[ns]
DatetimeIndex(['2005-11-23', '2010-12-31', 'NaT'], dtype='datetime64[ns]', freq=None)


In [19]:
# range of dates
print (pd.date_range('1/1/2020', periods=5))
print("============")

print (pd.date_range('1/1/2020', periods=5,freq='M'))
print("============")

# bdate_range -> business date range, excludes SAT and SUN
print (pd.bdate_range('1/1/2020', periods=5))

# using start and end
start = pd.datetime(2011, 1, 1)
end = pd.datetime(2011, 1, 5)

print (pd.date_range(start, end))

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05'],
              dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31'],
              dtype='datetime64[ns]', freq='M')
DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-06',
               '2020-01-07'],
              dtype='datetime64[ns]', freq='B')
DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',
               '2011-01-05'],
              dtype='datetime64[ns]', freq='D')


  if sys.path[0] == '':
  del sys.path[0]


In [20]:
# Offset Aliases
# A number of string aliases are given to useful common time series frequencies. We will refer to these aliases as offset aliases.

In [23]:
# TimeDelta

print (pd.Timedelta('2 days 2 hours 15 minutes 30 seconds'))
print (pd.Timedelta(6,unit='h'))
print (pd.Timedelta(days=2))

2 days 02:15:30
0 days 06:00:00
2 days 00:00:00


In [27]:
# Operations

s = pd.Series(pd.date_range('2012-1-1', periods=3, freq='D'))
print(s)
print("============")

td = pd.Series([ pd.Timedelta(days=i) for i in range(3) ])
df = pd.DataFrame(dict(A = s, B = td))
print (df)
print("============")

df['C']=df['A']+df['B']
df['D']=df['C']-df['B']
print (df)


0   2012-01-01
1   2012-01-02
2   2012-01-03
dtype: datetime64[ns]
           A      B
0 2012-01-01 0 days
1 2012-01-02 1 days
2 2012-01-03 2 days
           A      B          C          D
0 2012-01-01 0 days 2012-01-01 2012-01-01
1 2012-01-02 1 days 2012-01-03 2012-01-02
2 2012-01-03 2 days 2012-01-05 2012-01-03


#### Categorical Data

In [12]:
# Object Creation : Categorical object can be created in multiple ways. 

# category
# By specifying the dtype as "category" in pandas object creation.

s = pd.Series(["a","b","c","a"], dtype="category")
print (s)
print("============")

# pd.Categorical

cat = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'])
print (cat)
print("============")

cat=pd.Categorical(['a','b','c','a','b','c','d'], ['c', 'b', 'a'])
print (cat)
print("============")

cat=pd.Categorical(['a','b','c','a','b','c','d'], ['c', 'b', 'a'],ordered=True)
print (cat)
print("============")

# Description
# Using the .describe() command on the categorical data, we get similar output to a Series or DataFrame of the type string.
cat = pd.Categorical(["a", "c", "c", np.nan], categories=["b", "a", "c"])
df = pd.DataFrame({"cat":cat, "s":["a", "c", "c", np.nan]})
print (df)
print (df.describe())
print (df["cat"].describe())

0    a
1    b
2    c
3    a
dtype: category
Categories (3, object): [a, b, c]
[a, b, c, a, b, c]
Categories (3, object): [a, b, c]
[a, b, c, a, b, c, NaN]
Categories (3, object): [c, b, a]
[a, b, c, a, b, c, NaN]
Categories (3, object): [c < b < a]
   cat    s
0    a    a
1    c    c
2    c    c
3  NaN  NaN
       cat  s
count    3  3
unique   2  2
top      c  c
freq     2  2
count     3
unique    2
top       c
freq      2
Name: cat, dtype: object


In [15]:
# Get the Properties of the Category

s = pd.Categorical(["a", "c", "c", np.nan], categories=["b", "a", "c"])
print (s.categories)

print (cat.ordered) # False, as we did not specify any order

Index(['b', 'a', 'c'], dtype='object')
False


In [16]:
# Renaming Categories
s = pd.Series(["a","b","c","a"], dtype="category")
s.cat.categories = ["Group %s" % g for g in s.cat.categories]
print (s.cat.categories)

Index(['Group a', 'Group b', 'Group c'], dtype='object')


In [18]:
# Appending New Categories
s = pd.Series(["a","b","c","a"], dtype="category")
s = s.cat.add_categories([4])
print (s.cat.categories)

Index(['a', 'b', 'c', 4], dtype='object')


In [19]:
# Removing Categories
s = pd.Series(["a","b","c","a"], dtype="category")
print ("Original object:")
print (s)

print ("After removal:")
print (s.cat.remove_categories("a"))

Original object:
0    a
1    b
2    c
3    a
dtype: category
Categories (3, object): [a, b, c]
After removal:
0    NaN
1      b
2      c
3    NaN
dtype: category
Categories (2, object): [b, c]
