# CONVERTING DATATYPES

In [1]:
import numpy as np
import pandas as pd

%config IPCompleter.greedy = True

# Suppress scientific notation
np.set_printoptions(suppress=True)

In [None]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
movies = pd.read_csv('http://bit.ly/imdbratings')
orders = pd.read_csv('http://bit.ly/chiporders', sep='\t')
orders['item_price'] = orders.item_price.str.replace('$', '').astype('float')
stocks = pd.read_csv('http://bit.ly/smallstocks', parse_dates=['Date'])
titanic = pd.read_csv('http://bit.ly/kaggletrain')
ufo = pd.read_csv('http://bit.ly/uforeports', parse_dates=['Time'])

## CONVERT STRINGS TO NUMBERS

In [21]:
df = pd.DataFrame({'Column A':['1.1', '2.2', '3.3'],
                   'Column B':['4.4', '5.5', '6.6'],
                   'Column C':['7.7', '8.8', '-']})

print('Datatypes:')
print(df.dtypes)

df

Datatypes:
Column A    object
Column B    object
Column C    object
dtype: object


Unnamed: 0,Column A,Column B,Column C
0,1.1,4.4,7.7
1,2.2,5.5,8.8
2,3.3,6.6,-


#### METHOD 1: CONVERT COLUMN A TO FLOAT

In [59]:
df.astype({'Column A':'float'}).dtypes

Column A    float64
Column B    float64
Column C    float64
dtype: object

#### METHOD 2: CONVERT COLUMN B TO FLOAT

In [26]:
df['Column B'].astype('float')

dtype('float64')

#### METHOD 3: CONVERT COLUMN C TO FLOAT (AND REPLACE NAN WITH ZERO)

In [33]:
pd.to_numeric(df['Column C'], errors='coerce').fillna(0).dtype

dtype('float64')

#### METHOD 4: CONVERT ALL COLUMNS TO FLOATS AND REPLACE NAN WITH ZERO

In [57]:
df = df.apply(pd.to_numeric, errors='coerce').fillna(0)
df

Unnamed: 0,Column A,Column B,Column C
0,1.1,4.4,7.7
1,2.2,5.5,8.8
2,3.3,6.6,0.0


### CONVERT FLOATS TO INTEGERS

#### METHOD 1: CONVERT COLUMN A TO INT

In [63]:
df.astype({'Column A':'int'}).dtypes

Column A      int32
Column B    float64
Column C    float64
dtype: object

#### METHOD 2: CONVERT COLUMN B TO INT64

In [66]:
df.astype({'Column B':np.int64}).dtypes

Column A    float64
Column B      int64
Column C    float64
dtype: object

#### METHOD 3 CONVERT COLUMN C TO INT

In [67]:
df['Column C'].astype('int')

0    7
1    8
2    0
Name: Column C, dtype: int32