In [5]:
# Construction
import pandas as pd
import numpy as np

In [3]:
arr = pd.array([1, 2, None], dtype=pd.Int64Dtype())

In [6]:
pd.array([1, 2, np.nan], dtype="Int64")

<IntegerArray>
[1, 2, <NA>]
Length: 3, dtype: Int64

In [None]:
# All NA-like values are replaced with pandas.NA.

In [7]:
pd.array([1, 2, np.nan, None, pd.NA], dtype="Int64")

<IntegerArray>
[1, 2, <NA>, <NA>, <NA>]
Length: 5, dtype: Int64

In [8]:
pd.Series(arr)

0       1
1       2
2    <NA>
dtype: Int64

In [None]:
# Currently pandas.array() and pandas.Series() use different rules for dtype inference. pandas.array() will infer a nullable- integer dtype

In [9]:
pd.array([1, None])

<IntegerArray>
[1, <NA>]
Length: 2, dtype: Int64

In [10]:
pd.array([1, 2])

<IntegerArray>
[1, 2]
Length: 2, dtype: Int64

In [12]:
# For backwards-compatibility, Series infers these as either integer or float dtype

In [13]:
pd.Series([1, None])

0    1.0
1    NaN
dtype: float64

In [None]:
# We recommend explicitly providing the dtype to avoid confusion

In [15]:
pd.array([1, None], dtype="Int64")

<IntegerArray>
[1, <NA>]
Length: 2, dtype: Int64

In [14]:
pd.Series([1, None], dtype="Int64")

0       1
1    <NA>
dtype: Int64

In [None]:
# # Operations
# Operations involving an integer array will behave similar to NumPy arrays. Missing values will be propagated, and the data will be coerced to another dtype if needed.



In [18]:
s = pd.Series([1, 2, None], dtype="Int64")
s + 1
s == 1

0     True
1    False
2     <NA>
dtype: boolean

In [None]:
# operate with other dtypes

In [19]:
s + s.iloc[1:3].astype("Int8")

0    <NA>
1       4
2    <NA>
dtype: Int64

In [None]:
# coerce when needed
s + 0.01

In [None]:
# These dtypes can operate as part of DataFrame.

In [20]:
df = pd.DataFrame({"A": s, "B": [1, 1, 3], "C": list("aab")})

df

Unnamed: 0,A,B,C
0,1.0,1,a
1,2.0,1,a
2,,3,b


In [None]:
# These dtypes can be merged & reshaped & casted.

In [21]:
pd.concat([df[["A"]], df[["B", "C"]]], axis=1).dtypes
df["A"].astype(float)

0    1.0
1    2.0
2    NaN
Name: A, dtype: float64

In [None]:
# Reduction and groupby operations such as ‘sum’ work as well.

In [22]:
df.sum()
df.groupby("B").A.sum()

B
1    3
3    0
Name: A, dtype: Int64

In [None]:
# Scalar NA Value

In [None]:
# arrays.IntegerArray uses pandas.NA as its scalar missing value. Slicing a single element that’s missing will return pandas.NA

In [23]:
a = pd.array([1, None], dtype="Int64")