In [1]:
import numpy as np
import pandas as pd

In [2]:
print( pd.date_range("3/1/2024", periods= 10) )

DatetimeIndex(['2024-03-01', '2024-03-02', '2024-03-03', '2024-03-04',
               '2024-03-05', '2024-03-06', '2024-03-07', '2024-03-08',
               '2024-03-09', '2024-03-10'],
              dtype='datetime64[ns]', freq='D')


**Adding Frequency**

In [3]:
print( pd.date_range("3/1/2024", periods= 10, freq= "M") )

DatetimeIndex(['2024-03-31', '2024-04-30', '2024-05-31', '2024-06-30',
               '2024-07-31', '2024-08-31', '2024-09-30', '2024-10-31',
               '2024-11-30', '2024-12-31'],
              dtype='datetime64[ns]', freq='M')


**Other Frequency Options**

In [4]:
print( pd.date_range("1/1/2023", periods= 7, freq= "MS") )

DatetimeIndex(['2023-01-01', '2023-02-01', '2023-03-01', '2023-04-01',
               '2023-05-01', '2023-06-01', '2023-07-01'],
              dtype='datetime64[ns]', freq='MS')


**Time Delta**

In [5]:
Day_1 = pd.to_datetime("today")
print("Day 1:", Day_1)

Day 1: 2024-03-02 11:38:38.842460


In [6]:
Day_2 = Day_1 + pd.Timedelta("1 day")
print("Day 2:", Day_2, Day_2.day_name())

Day 2: 2024-03-03 11:38:38.842460 Sunday


**Date Operations**

In [7]:
Date = pd.Series(pd.date_range("2024-1-1", periods= 7, freq= "D"))
series_Date = pd.Series([pd.Timedelta(days= i) for i in range(7)])
df_Date = pd.DataFrame({"Date": Date, "To_Add": series_Date})
df_Date

Unnamed: 0,Date,To_Add
0,2024-01-01,0 days
1,2024-01-02,1 days
2,2024-01-03,2 days
3,2024-01-04,3 days
4,2024-01-05,4 days
5,2024-01-06,5 days
6,2024-01-07,6 days


In [8]:
df_Date["Final_Date_add"] = df_Date["Date"] + df_Date["To_Add"]
df_Date

Unnamed: 0,Date,To_Add,Final_Date_add
0,2024-01-01,0 days,2024-01-01
1,2024-01-02,1 days,2024-01-03
2,2024-01-03,2 days,2024-01-05
3,2024-01-04,3 days,2024-01-07
4,2024-01-05,4 days,2024-01-09
5,2024-01-06,5 days,2024-01-11
6,2024-01-07,6 days,2024-01-13


In [9]:
df_Date["Final_Date_sub"] = df_Date["Date"] - df_Date["To_Add"]
df_Date

Unnamed: 0,Date,To_Add,Final_Date_add,Final_Date_sub
0,2024-01-01,0 days,2024-01-01,2024-01-01
1,2024-01-02,1 days,2024-01-03,2024-01-01
2,2024-01-03,2 days,2024-01-05,2024-01-01
3,2024-01-04,3 days,2024-01-07,2024-01-01
4,2024-01-05,4 days,2024-01-09,2024-01-01
5,2024-01-06,5 days,2024-01-11,2024-01-01
6,2024-01-07,6 days,2024-01-13,2024-01-01


In [10]:
df_Date["Year"] = df_Date["Date"].dt.year
df_Date["Month"] = df_Date["Date"].dt.month
df_Date["Day"] = df_Date["Date"].dt.day

df_Date

Unnamed: 0,Date,To_Add,Final_Date_add,Final_Date_sub,Year,Month,Day
0,2024-01-01,0 days,2024-01-01,2024-01-01,2024,1,1
1,2024-01-02,1 days,2024-01-03,2024-01-01,2024,1,2
2,2024-01-03,2 days,2024-01-05,2024-01-01,2024,1,3
3,2024-01-04,3 days,2024-01-07,2024-01-01,2024,1,4
4,2024-01-05,4 days,2024-01-09,2024-01-01,2024,1,5
5,2024-01-06,5 days,2024-01-11,2024-01-01,2024,1,6
6,2024-01-07,6 days,2024-01-13,2024-01-01,2024,1,7


**Categorical Data**

In [11]:
gender = pd.Series(["Male", "Female", "Male", "Female", "Female", "Male", "Male", "Male", "Female"], dtype= "category")
gender

0      Male
1    Female
2      Male
3    Female
4    Female
5      Male
6      Male
7      Male
8    Female
dtype: category
Categories (2, object): ['Female', 'Male']

In [12]:
Categories = pd.Categorical(gender)
Categories

['Male', 'Female', 'Male', 'Female', 'Female', 'Male', 'Male', 'Male', 'Female']
Categories (2, object): ['Female', 'Male']

**Accessing the objects from the categories**

In [13]:
print("Categories:", Categories[0], "and", Categories[1])

Categories: Male and Female


**Summary on the Categrical attribute**

In [14]:
gender.describe()

count        9
unique       2
top       Male
freq         5
dtype: object

**Removing certain categories**

In [15]:
gender.cat.remove_categories("Male")

0       NaN
1    Female
2       NaN
3    Female
4    Female
5       NaN
6       NaN
7       NaN
8    Female
dtype: category
Categories (1, object): ['Female']

**Sparse Data - Useful when there is a large number of zero elements**

In [16]:
df = pd.DataFrame(np.random.randn(10000, 4))
df.iloc[:9998] = np.nan
df_1 = df.astype(pd.SparseDtype("float", np.nan))
type(df_1)

pandas.core.frame.DataFrame

In [17]:
df_1.dtypes

0    Sparse[float64, nan]
1    Sparse[float64, nan]
2    Sparse[float64, nan]
3    Sparse[float64, nan]
dtype: object

In [18]:
df_1.sparse.density

0.0002