In [1]:
# Source: https://pbpython.com/pandas_dtypes.html
import pandas as pd
import numpy as np

In [2]:
url = 'https://raw.githubusercontent.com/chris1610/pbpython/master/data/sales_data_types.csv'
df = pd.read_csv(url)

In [3]:
df

Unnamed: 0,Customer Number,Customer Name,2016,2017,Percent Growth,Jan Units,Month,Day,Year,Active
0,10002.0,Quest Industries,"$125,000.00",$162500.00,30.00%,500,1,10,2015,Y
1,552278.0,Smith Plumbing,"$920,000.00","$101,2000.00",10.00%,700,6,15,2014,Y
2,23477.0,ACME Industrial,"$50,000.00",$62500.00,25.00%,125,3,29,2016,Y
3,24900.0,Brekke LTD,"$350,000.00",$490000.00,4.00%,75,10,27,2015,Y
4,651029.0,Harbor Co,"$15,000.00",$12750.00,-15.00%,Closed,2,2,2014,N


In [4]:
df.dtypes
# Customer Number -> int64
# 2016,2017 -> int or float
# Month, Day, Year -> date
# Active -> Bool

Customer Number    float64
Customer Name       object
2016                object
2017                object
Percent Growth      object
Jan Units           object
Month                int64
Day                  int64
Year                 int64
Active              object
dtype: object

In [5]:
# astype() function
df['Customer Number'] = df['Customer Number'].astype('int64')

In [6]:
# lambda
df['Percent Growth'] = df['Percent Growth'].apply(lambda x: x.replace('%', '')).astype('float') / 100

In [7]:
# custom conversion functions
def convert_currency(val):
    new_val = val.replace(',','').replace('$','')
    return float(new_val)

df['2016'] = df['2016'].apply(convert_currency)
df['2017'] = df['2017'].apply(convert_currency)

In [8]:
# np.where
df['Active'].astype('bool')
df["Active"] = np.where(df["Active"] == "Y", True, False)

In [9]:
# pandas helper functions
df['Jan Units'] = pd.to_numeric(df['Jan Units'], errors='coerce').fillna(0) #coerce convert "cloased" value to Nan
df["Start_Date"] = pd.to_datetime(df[['Month', 'Day', 'Year']])

In [12]:
df.dtypes

Customer Number             int64
Customer Name              object
2016                      float64
2017                      float64
Percent Growth            float64
Jan Units                 float64
Month                       int64
Day                         int64
Year                        int64
Active                       bool
Start_Date         datetime64[ns]
dtype: object

In [10]:
# bring it together
def convert_currency(val):
    """
    Convert the string number value to a float
     - Remove $
     - Remove commas
     - Convert to float type
    """
    new_val = val.replace(',','').replace('$', '')
    return float(new_val)

def convert_percent(val):
    """
    Convert the percentage string to an actual floating point percent
    - Remove %
    - Divide by 100 to make decimal
    """
    new_val = val.replace('%', '')
    return float(new_val) / 100


df_2 = pd.read_csv(url,
                   dtype={'Customer Number': 'int'},
                   converters={'2016': convert_currency,
                               '2017': convert_currency,
                               'Percent Growth': convert_percent,
                               'Jan Units': lambda x: pd.to_numeric(x, errors='coerce'),
                               'Active': lambda x: np.where(x == "Y", True, False)
                              })

df_2.dtypes

Customer Number      int64
Customer Name       object
2016               float64
2017               float64
Percent Growth     float64
Jan Units          float64
Month                int64
Day                  int64
Year                 int64
Active              object
dtype: object