In [84]:
import pandas as pd
from datetime import date, datetime

df = pd.DataFrame({'a_column': [True, False, None, 100, 0.01, date.today(), datetime.now(), 'Nice']})

df.info()
df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 1 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   a_column  7 non-null      object
dtypes: object(1)
memory usage: 196.0+ bytes


Unnamed: 0,a_column
0,True
1,False
2,
3,100
4,0.01
5,2025-11-27
6,2025-11-27 18:43:50.388649
7,Nice


<h1>It looks like everything can be casted into 'string'</h1>

In [85]:
df = pd.DataFrame({'a_column': [True, False, None, 100, 0.01, date.today(), datetime.now(), 'Nice']})
df['a_column'] = df['a_column'].astype('string')

df.info()
df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 1 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   a_column  7 non-null      string
dtypes: string(1)
memory usage: 196.0 bytes


Unnamed: 0,a_column
0,True
1,False
2,
3,100
4,0.01
5,2025-11-27
6,2025-11-27 18:43:55.051385
7,Nice


<h1>With string type, we can do all the str operations</h1>

In [86]:
df['a_column'] = df['a_column'].str.lower().str.strip()
df

Unnamed: 0,a_column
0,true
1,false
2,
3,100
4,0.01
5,2025-11-27
6,2025-11-27 18:43:55.051385
7,nice


<h1>strip()</h1>
<h4> by default, the pandas.Series.str.strip() method removes the <b>non-breaking space character</b> (\\xa0 or \\u00A0) along with other standard whitespace characters <b>(spaces, tabs, newlines)</b>. This is because it is equivalent to the Python built-in str.strip() method, which handles all characters in the string.whitespace constant by default. </h4>

In [119]:
df = pd.DataFrame({'a_column': ['a string', f'a string{chr(160)}{chr(160)}']})
df.loc[df['a_column'] == 'a string', :]

Unnamed: 0,a_column
0,a string


In [120]:
df = pd.DataFrame({'a_column': ['a string', f'a string{chr(160)}{chr(160)}']})
df['a_column'] = df['a_column'].str.strip()
df.loc[df['a_column'] == 'a string', :]

Unnamed: 0,a_column
0,a string
1,a string


<h1>python built-in strip() also strips the non-breaking space character (special charater)</h1>

In [122]:
'a string' == f'a string{chr(160)}{chr(160)}'.strip()

True

<h1>convert_dtypes() no longer convert 'string'</h1>

In [160]:
df = pd.DataFrame({'a_column': [True, False, True, False]})
df.info()

df['a_column'] = df['a_column'].astype('string')
df.info()

df.convert_dtypes()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 1 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   a_column  4 non-null      bool 
dtypes: bool(1)
memory usage: 136.0 bytes
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 1 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   a_column  4 non-null      string
dtypes: string(1)
memory usage: 164.0 bytes
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 1 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   a_column  4 non-null      string
dtypes: string(1)
memory usage: 164.0 bytes
