# CONVERTING DATATYPES

In [1]:
import numpy as np
import pandas as pd

# Suppress scientific notation
np.set_printoptions(suppress=True)

## CONVERT STRINGS TO NUMBERS

In [28]:
df = pd.DataFrame({'Column A':['1.1', '2.2', '3.3'],
                   'Column B':['4.4', '5.5', '6.6'],
                   'Column C':['7.7', '8.8', '-']})

print('Datatypes:')
print(df.dtypes)

df

Datatypes:
Column A    object
Column B    object
Column C    object
dtype: object


Unnamed: 0,Column A,Column B,Column C
0,1.1,4.4,7.7
1,2.2,5.5,8.8
2,3.3,6.6,-


#### METHOD 1: CONVERT COLUMN A TO FLOAT

In [3]:
df.astype({'Column A':'float'}).dtypes

Column A    float64
Column B     object
Column C     object
dtype: object

#### METHOD 2: CONVERT COLUMN B TO FLOAT

In [4]:
df['Column B'].astype('float')

0    4.4
1    5.5
2    6.6
Name: Column B, dtype: float64

#### METHOD 3: CONVERT COLUMN C TO FLOAT (AND REPLACE NAN WITH ZERO)

In [5]:
pd.to_numeric(df['Column C'], errors='coerce').fillna(0).dtype

dtype('float64')

#### METHOD 4: CONVERT ALL COLUMNS TO FLOATS AND REPLACE NAN WITH ZERO

In [21]:
df_floats = df.apply(pd.to_numeric, errors='coerce').fillna(0)
df_floats

Unnamed: 0,Column A,Column B,Column C
0,1.1,4.4,7.7
1,2.2,5.5,8.8
2,3.3,6.6,0.0


### CONVERT FLOATS TO INTEGERS

#### METHOD 1: CONVERT COLUMN A TO INT

In [23]:
df_floats.astype({'Column A':'int'}).dtypes

Column A      int32
Column B    float64
Column C    float64
dtype: object

#### METHOD 2: CONVERT COLUMN B TO INT64

In [24]:
df_floats.astype({'Column B':np.int64}).dtypes

Column A    float64
Column B      int64
Column C    float64
dtype: object

#### METHOD 3 CONVERT COLUMN C TO INT

In [25]:
df_floats['Column C'].astype('int')

0    7
1    8
2    0
Name: Column C, dtype: int32

### CONVERT FROM STRINGS TO INTEGERS

Pandas does not allow to change data type from string to integer directly so we first have to change to a float and then we can change to an integer.

In [41]:
df['Column A'] = df['Column A'].astype('float').astype('int')
df.dtypes

Column A     int32
Column B    object
Column C     int64
dtype: object

In [40]:
df['Column C'] = pd.to_numeric(df['Column C'], errors='coerce').fillna(0).astype(np.int64)
df.dtypes

Column A    object
Column B    object
Column C     int64
dtype: object