## Pandas Snippets

Author: Sebastian Raschka

Source: https://sebastianraschka.com/notebooks/python-notebooks.html

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/rasbt/python_reference/master/Data/some_soccer_data.csv')
df.head()

In [None]:
# Converting column names to lowercase

df.columns = [c.lower() for c in df.columns]

# or
# df.rename(columns=lambda x : x.lower())

df.tail(3)

In [None]:
df = df.rename(columns={'p': 'points', 
                        'gp': 'games',
                        'sot': 'shots_on_target',
                        'g': 'goals',
                        'ppg': 'points_per_game',
                        'a': 'assists',})

df.tail(3)

In [None]:
# Processing `salary` column

df['salary'] = df['salary'].apply(lambda x: x.strip('$m'))
df.tail()

In [None]:
#df['team'] = pd.Series('', index=df.index)

# or
df.insert(loc=8, column='position', value='') 

df.tail(3)

In [None]:
%timeit df['added1'] = pd.Series('', index=df.index)
#%time df.insert(loc=len(df.columns), column='added2', value='') 
%timeit df['added3'] = ''


In [None]:
df.drop(['added1', 'added3'], axis=1, inplace=True)

In [None]:
# Processing `player` column

def process_player_col(text):
    name, rest = text.split('\n')
    position, team = [x.strip() for x in rest.split(' — ')]
    return pd.Series([name, team, position])

df[['player', 'team', 'position']] = df.player.apply(process_player_col)

# modified after tip from reddit.com/user/hharison
#
# Alternative (inferior) approach:
#
#for idx,row in df.iterrows():
#    name, position, team = process_player_col(row['player'])
#    df.ix[idx, 'player'], df.ix[idx, 'position'], df.ix[idx, 'team'] = name, position, team
    
df.tail(3)

In [None]:
cols = ['player', 'position', 'team']
df[cols] = df[cols].applymap(lambda x: x.lower())
df.head()

In [None]:
import numpy as np

In [None]:
%%timeit

df.append(pd.Series(
                [np.nan]*len(df.columns), # Fill cells with NaNs
                index=df.columns),    
                ignore_index=True)

In [None]:
%%timeit
nans = [np.nan] * len(df.columns)
df.loc[-1] = nans

In [None]:
df