https://linus-mk.hatenablog.com/entry/pandas_convert_float_to_int

In [1]:
import pandas as pd
import numpy as np
pd.options.display.notebook_repr_html = False  # jupyter notebook上での出力形式を制御するために書いています。無くても動きます。

In [2]:
# 動作環境の確認
print(pd.__version__)
print(np.__version__)

1.0.5
1.18.1


## 1列だけ変換する

In [3]:
df = pd.DataFrame({
    'col_A': [1.2 ,3.4, 5.6],
    'col_B': [9.8, 7.6, 5.4],
    'col_C': [11.1, 22.2, 33.3],
    'col_D': [99.9, 88.8, 77.7]
})
df

   col_A  col_B  col_C  col_D
0    1.2    9.8   11.1   99.9
1    3.4    7.6   22.2   88.8
2    5.6    5.4   33.3   77.7

In [4]:
df.dtypes

col_A    float64
col_B    float64
col_C    float64
col_D    float64
dtype: object

In [5]:
df['col_B'].astype('int')

0    9
1    7
2    5
Name: col_B, dtype: int64

In [6]:
df['col_B'] = df['col_B'].astype('int')
df

   col_A  col_B  col_C  col_D
0    1.2      9   11.1   99.9
1    3.4      7   22.2   88.8
2    5.6      5   33.3   77.7

In [7]:
df.dtypes

col_A    float64
col_B      int64
col_C    float64
col_D    float64
dtype: object

## 複数列

In [8]:
df = pd.DataFrame({
    'col_A': [1.2 ,3.4, 5.6],
    'col_B': [9.8, 7.6, 5.4],
    'col_C': [11.1, 22.2, 33.3],
    'col_D': [99.9, 88.8, 77.7]
})
df

   col_A  col_B  col_C  col_D
0    1.2    9.8   11.1   99.9
1    3.4    7.6   22.2   88.8
2    5.6    5.4   33.3   77.7

In [9]:
df[['col_B', 'col_D']].astype('int')

   col_B  col_D
0      9     99
1      7     88
2      5     77

In [10]:
df[['col_B', 'col_D']] = df[['col_B', 'col_D']].astype('int')
df

   col_A  col_B  col_C  col_D
0    1.2      9   11.1     99
1    3.4      7   22.2     88
2    5.6      5   33.3     77

In [11]:
df.dtypes

col_A    float64
col_B      int64
col_C    float64
col_D      int64
dtype: object

## 全部の列

In [12]:
df = pd.DataFrame({
    'col_A': [1.2 ,3.4, 5.6],
    'col_B': [9.8, 7.6, 5.4],
    'col_C': [11.1, 22.2, 33.3],
    'col_D': [99.9, 88.8, 77.7]
})
df

   col_A  col_B  col_C  col_D
0    1.2    9.8   11.1   99.9
1    3.4    7.6   22.2   88.8
2    5.6    5.4   33.3   77.7

In [13]:
df.astype('int')

   col_A  col_B  col_C  col_D
0      1      9     11     99
1      3      7     22     88
2      5      5     33     77

In [14]:
df = df.astype('int')
df

   col_A  col_B  col_C  col_D
0      1      9     11     99
1      3      7     22     88
2      5      5     33     77

In [15]:
df.dtypes

col_A    int64
col_B    int64
col_C    int64
col_D    int64
dtype: object

## 文字を含む

In [16]:
df = pd.DataFrame({
    'col_A': [1.2 ,3.4, 5.6],
    'col_B': [9.8, 7.6, 5.4],
    'col_C': [11.1, 22.2, 33.3],
    'col_string': ['hello', 'good_morning', 'good_night']
})
df

   col_A  col_B  col_C    col_string
0    1.2    9.8   11.1         hello
1    3.4    7.6   22.2  good_morning
2    5.6    5.4   33.3    good_night

In [17]:
df.astype('int')

ValueError: invalid literal for int() with base 10: 'hello'

In [18]:
df.astype('int', errors='ignore')

   col_A  col_B  col_C    col_string
0      1      9     11         hello
1      3      7     22  good_morning
2      5      5     33    good_night

In [19]:
df = df.astype('int', errors='ignore')
df.dtypes

col_A          int64
col_B          int64
col_C          int64
col_string    object
dtype: object

## nanを含む

In [20]:
df = pd.DataFrame({
    'col_A': [1.2 ,3.4, 5.6],
    'col_B': [np.nan, 7.6, 5.4],
    'col_C': [11.1, 22.2, 33.3],
    'col_D': [99.9, np.nan, 77.7]
})
df

   col_A  col_B  col_C  col_D
0    1.2    NaN   11.1   99.9
1    3.4    7.6   22.2    NaN
2    5.6    5.4   33.3   77.7

In [21]:
df.astype('int')

ValueError: Cannot convert non-finite values (NA or inf) to integer

In [22]:
df.astype('int', errors='ignore')

   col_A  col_B  col_C  col_D
0    1.2    NaN   11.1   99.9
1    3.4    7.6   22.2    NaN
2    5.6    5.4   33.3   77.7

In [23]:
df.astype('int', errors='ignore').dtypes

col_A    float64
col_B    float64
col_C    float64
col_D    float64
dtype: object

In [24]:
df.fillna(0)

   col_A  col_B  col_C  col_D
0    1.2    0.0   11.1   99.9
1    3.4    7.6   22.2    0.0
2    5.6    5.4   33.3   77.7

In [25]:
df.fillna(0).astype('int', errors='ignore')

   col_A  col_B  col_C  col_D
0      1      0     11     99
1      3      7     22      0
2      5      5     33     77