In [103]:
import pandas as pd
from datetime import date, datetime, time
from dateutil.relativedelta import relativedelta

<h1>1.Default type and convert_dtypes()</h1>
<h4>By default when dataframe is created, string and date both get "object" type</h4>
<h4>The convert_dtypes() method can be used to convert columns of a DataFrame to the <b>"best possible"</b> dtypes,
        especifically those that support pd.NA for representing missing values</h4>

In [92]:
today = date.today()

D = {
    'name': ['yesterday', 'today', 'tomorrow', 'anything'],
    'value': [None, today, today + relativedelta(days=1), 'This is anything']
}

df = pd.DataFrame(D)
df.info()

print()

df = df.convert_dtypes()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    4 non-null      object
 1   value   3 non-null      object
dtypes: object(2)
memory usage: 196.0+ bytes

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    4 non-null      string
 1   value   3 non-null      object
dtypes: object(1), string(1)
memory usage: 196.0+ bytes


<h1>2. The object type</h1>
<h4>
The "object" data type is a general-purpose type used to store any python objects
</h4>
<h4>
Essentially, an "object" column stores a collection of arbitrary Python objects, meaning it can hold anything from lists and dictionaries to custom class instances
</h4>

In [54]:
for index, row in df.iterrows():
    n, v = row['name'], row['value']
    print(f'{n} - {v} - {type(v)}')

yesterday - None - <class 'NoneType'>
today - 2025-10-26 - <class 'datetime.date'>
tomorrow - 2025-10-27 - <class 'datetime.date'>
anything - This is anything - <class 'str'>


<h1>3. Check if an object item is null</h1>

In [57]:
df['value'].isnull()

0     True
1    False
2    False
3    False
Name: value, dtype: bool

<h1>4. the bitwise operators (& for AND, | for OR) are typically NOT short circuited</h1>
<h3>
    It is safer to use apply for loc[,] if mix typed value is saved in the object column (series), because python "and/or" operations are short circuited
</h3>

In [65]:
df.loc[df['value'].apply(lambda x: (x is not None) and isinstance(x, date) and x > date.today()), :]

Unnamed: 0,name,value
2,tomorrow,2025-10-27


<h1>5. loc[,] with None</h1>
<h4>
    Compare with None in pandas result in false
</h4>

In [93]:
today = date.today()
df = pd.DataFrame({
    'name': ['yesterday', 'today', 'tomorrow'],
    'value': [None, today, today + relativedelta(days=1)]
})

In [86]:
df['value'] < today

0    False
1    False
2    False
Name: value, dtype: bool

In [88]:
df['value'] == today

0    False
1     True
2    False
Name: value, dtype: bool

In [91]:
df['value'] > today

0    False
1    False
2     True
Name: value, dtype: bool

<h1>date & datetime</h1>

<h4>date and datetime are not the same data type</h4>
<h4>They are not directly comparable</h4>
<h4>Need to convert to do the comparison</h4>

In [95]:
today = date.today()
now = datetime.now()

In [105]:
today == now.date()

True