In [23]:
import pandas as pd
import numpy as np

In [24]:
drinks = pd.read_csv('http://bit.ly/drinksbycountry')
movies = pd.read_csv('http://bit.ly/imdbratings')
orders = pd.read_csv('http://bit.ly/chiporders', sep='\t')
orders['item_price'] = orders.item_price.str.replace('$', '').astype('float')
stocks = pd.read_csv('http://bit.ly/smallstocks', parse_dates=['Date'])
titanic = pd.read_csv('http://bit.ly/kaggletrain')
ufo = pd.read_csv('http://bit.ly/uforeports', parse_dates=['Time'])

# 1. Show installed versions

In [8]:
pd.__version__

'0.25.1'

In [7]:
pd.show_versions()


INSTALLED VERSIONS
------------------
commit           : None
python           : 3.7.4.final.0
python-bits      : 64
OS               : Windows
OS-release       : 10
machine          : AMD64
processor        : Intel64 Family 6 Model 61 Stepping 4, GenuineIntel
byteorder        : little
LC_ALL           : None
LANG             : None
LOCALE           : None.None

pandas           : 0.25.1
numpy            : 1.16.5
pytz             : 2019.3
dateutil         : 2.8.0
pip              : 19.2.3
setuptools       : 41.4.0
Cython           : 0.29.13
pytest           : 5.2.1
hypothesis       : None
sphinx           : 2.2.0
blosc            : None
feather          : None
xlsxwriter       : 1.2.1
lxml.etree       : 4.4.1
html5lib         : 1.0.1
pymysql          : None
psycopg2         : None
jinja2           : 2.10.3
IPython          : 7.8.0
pandas_datareader: None
bs4              : 4.8.0
bottleneck       : 1.2.1
fastparquet      : None
gcsfs            : None
lxml.etree       : 4.4.1
matplotli

# 2. Create an example DataFrame

In [9]:
df = pd.DataFrame({'Col one':[100, 200], 'Col two': [300, 400]})
df 

Unnamed: 0,Col one,Col two
0,100,300
1,200,400


In [10]:
pd.DataFrame(np.random.rand(4,8))

Unnamed: 0,0,1,2,3,4,5,6,7
0,0.550376,0.436976,0.318827,0.562064,0.22947,0.017802,0.989227,0.634233
1,0.195634,0.560025,0.397166,0.280922,0.729104,0.043523,0.230704,0.987325
2,0.169166,0.494295,0.349498,0.285307,0.892403,0.693902,0.295198,0.3996
3,0.4454,0.758766,0.186139,0.401187,0.731082,0.027462,0.282595,0.631009


In [11]:
pd.DataFrame(np.random.rand(4,8), columns = list('abcdefgh'))

Unnamed: 0,a,b,c,d,e,f,g,h
0,0.915428,0.388564,0.745722,0.116652,0.904111,0.139651,0.619279,0.450344
1,0.620948,0.453676,0.921379,0.106852,0.38739,0.506816,0.994345,0.238395
2,0.067004,0.311172,0.682997,0.086783,0.59889,0.822578,0.534404,0.833114
3,0.741014,0.111653,0.562614,0.498308,0.341056,0.407018,0.426582,0.628663


# 3. Remane columns

In [12]:
df

Unnamed: 0,Col one,Col two
0,100,300
1,200,400


In [13]:
df = df.rename({'Col one': 'col_one', 'Col two ': 'col_two'}, axis = 'columns')

In [14]:
df

Unnamed: 0,col_one,Col two
0,100,300
1,200,400


In [15]:
df.columns = ['col__one', 'col__two']

In [16]:
df

Unnamed: 0,col__one,col__two
0,100,300
1,200,400


In [19]:
df.columns = df.columns.str.replace('__', '_')

In [20]:
df

Unnamed: 0,col_one,col_two
0,100,300
1,200,400


In [21]:
df.add_prefix('x_')

Unnamed: 0,x_col_one,x_col_two
0,100,300
1,200,400


In [22]:
df.add_suffix('_y')

Unnamed: 0,col_one_y,col_two_y
0,100,300
1,200,400


# 4. Reverse row order

In [25]:
drinks.head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [27]:
drinks.loc[::-1].head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
192,Zimbabwe,64,18,4,4.7,Africa
191,Zambia,32,19,4,2.5,Africa
190,Yemen,6,0,0,0.1,Asia
189,Vietnam,111,2,1,2.0,Asia
188,Venezuela,333,100,3,7.7,South America


In [28]:
drinks.loc[::-1].reset_index(drop = True).head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Zimbabwe,64,18,4,4.7,Africa
1,Zambia,32,19,4,2.5,Africa
2,Yemen,6,0,0,0.1,Asia
3,Vietnam,111,2,1,2.0,Asia
4,Venezuela,333,100,3,7.7,South America


# 5. Reverse column order

In [30]:
drinks.loc[:,::-1].head()

Unnamed: 0,continent,total_litres_of_pure_alcohol,wine_servings,spirit_servings,beer_servings,country
0,Asia,0.0,0,0,0,Afghanistan
1,Europe,4.9,54,132,89,Albania
2,Africa,0.7,14,0,25,Algeria
3,Europe,12.4,312,138,245,Andorra
4,Africa,5.9,45,57,217,Angola


# 6. Select columns by data type

In [31]:
drinks.dtypes

country                          object
beer_servings                     int64
spirit_servings                   int64
wine_servings                     int64
total_litres_of_pure_alcohol    float64
continent                        object
dtype: object

In [32]:
drinks.select_dtypes(include = 'number').head()

Unnamed: 0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol
0,0,0,0,0.0
1,89,132,54,4.9
2,25,0,14,0.7
3,245,138,312,12.4
4,217,57,45,5.9


In [33]:
drinks.select_dtypes(include = 'object').head()

Unnamed: 0,country,continent
0,Afghanistan,Asia
1,Albania,Europe
2,Algeria,Africa
3,Andorra,Europe
4,Angola,Africa


In [38]:
drinks.select_dtypes(include = ['number','object', 'category', 'datetime']).head()

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa
3,Andorra,245,138,312,12.4,Europe
4,Angola,217,57,45,5.9,Africa


In [40]:
drinks.select_dtypes(exclude = 'number').head()

Unnamed: 0,country,continent
0,Afghanistan,Asia
1,Albania,Europe
2,Algeria,Africa
3,Andorra,Europe
4,Angola,Africa


# 7. Convert strings to numbers 

In [41]:
df = pd.DataFrame({'col_one':['1.1','2.2','3.3'],
                  'col_two': ['4.4','5.5','6.6'],
                  'col_three': ['7,7','8,8','-']})
df 

Unnamed: 0,col_one,col_two,col_three
0,1.1,4.4,77
1,2.2,5.5,88
2,3.3,6.6,-


In [42]:
df.dtypes

col_one      object
col_two      object
col_three    object
dtype: object

In [44]:
df.astype({'col_one':'float', 'col_two':'float'}).dtypes

col_one      float64
col_two      float64
col_three     object
dtype: object

In [50]:
# pd.to_numeric(df.col_three, errors = 'coerce')
pd.to_numeric(df.col_three, errors='coerce')

0   NaN
1   NaN
2   NaN
Name: col_three, dtype: float64

In [51]:
pd.to_numeric(df.col_three, errors='coerce').fillna(0)

0    0.0
1    0.0
2    0.0
Name: col_three, dtype: float64

In [52]:
df = df.apply(pd.to_numeric, errors='coerce').fillna(0)
df

Unnamed: 0,col_one,col_two,col_three
0,1.1,4.4,0.0
1,2.2,5.5,0.0
2,3.3,6.6,0.0


In [53]:
df.dtypes


col_one      float64
col_two      float64
col_three    float64
dtype: object