In [3]:
import pandas as pd 

In [4]:
people = {
    "first": ["Corey", 'Jane', 'John'], 
    "last": ["Schafer", 'Doe', 'Doe'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [5]:
df = pd.DataFrame(people)

In [6]:
df 

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [7]:
df.columns 

Index(['first', 'last', 'email'], dtype='object')

In [8]:
df.columns = ['first_name','last_name','email']

In [9]:
df 

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [10]:
df.columns = df.columns.str.upper() 

In [11]:
df 

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [12]:
df.columns = [x.lower() for x in df.columns]

In [13]:
df 

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [16]:
df.rename(columns={'first_name':'first','last_name':'last'}, inplace = True)

In [17]:
df 

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [18]:
df.loc[2,:] 

first                 John
last                   Doe
email    JohnDoe@email.com
Name: 2, dtype: object

In [19]:
df.loc[2,:] = ['John','Smith','JohnSmith@email.com']

In [20]:
df 

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnSmith@email.com


In [21]:
df.loc[2,['last','email']] 

last                   Smith
email    JohnSmith@email.com
Name: 2, dtype: object

In [22]:
df.loc[2,['last','email']] = ['Doe','JohnDoe@email.com']

In [23]:
df 

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [24]:
df.loc[2,'last'] = 'Smith'

In [25]:
df 

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [26]:
df.at[2,'last'] = 'Doe'

In [27]:
df 

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [29]:
filt = (df['email'] == 'JohnDoe@email.com')
df[filt]['last']

2    Doe
Name: last, dtype: object

In [30]:
df[filt]['last'] = 'Smith' 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [31]:
df.loc[filt,'last'] = 'Smith' 

In [32]:
df 

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [34]:
df['email'].str.lower() 

0    coreymschafer@gmail.com
1          janedoe@email.com
2          johndoe@email.com
Name: email, dtype: object

In [35]:
df['email'] = df['email'].str.lower()

In [36]:
df 

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [None]:
# apply
# map
# applymap
# replace

In [None]:
# apply

In [37]:
df['email'].apply(len) 

0    23
1    17
2    17
Name: email, dtype: int64

In [38]:
df 

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [39]:
def update_email(email):
    return email.upper()

In [40]:
df['email'].apply(update_email)

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@EMAIL.COM
2          JOHNDOE@EMAIL.COM
Name: email, dtype: object

In [41]:
df['email'] = df['email'].apply(update_email)

In [42]:
df 

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Smith,JOHNDOE@EMAIL.COM


In [43]:
df['email'] = df['email'].apply(lambda x:x.lower())

In [44]:
df 

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [45]:
df.apply(len) 

first    3
last     3
email    3
dtype: int64

In [46]:
df.apply(len, axis='columns')  

0    3
1    3
2    3
dtype: int64

In [47]:
df.apply(pd.Series.min) 

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [48]:
df.apply(lambda x:x.min() )

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [49]:
df.applymap(len) 

Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,4,5,17


In [52]:
df.applymap(str.lower) 

Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,smith,johndoe@email.com


In [53]:
df['first'].map({'Corey':'Chris','Jane':'Mary'}) 

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [55]:
df['first'].replace({'Corey':'Chris','Jane':'Mary'}) 

0    Chris
1     Mary
2     John
Name: first, dtype: object

In [56]:
df 

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [57]:
survey_df = pd.read_csv('data/survey_results_public.csv')
schema_df = pd.read_csv('data/survey_results_schema.csv')

In [58]:
pd.set_option('display.max_columns',85)
pd.set_option('display.max_rows',85)

In [65]:
survey_df.rename(columns = {'ConvertedComp':'SalaryUSD'},inplace = True)

In [67]:
survey_df.columns 

Index(['Respondent', 'MainBranch', 'Hobbyist', 'OpenSourcer', 'OpenSource',
       'Employment', 'Country', 'Student', 'EdLevel', 'UndergradMajor',
       'EduOther', 'OrgSize', 'DevType', 'YearsCode', 'Age1stCode',
       'YearsCodePro', 'CareerSat', 'JobSat', 'MgrIdiot', 'MgrMoney',
       'MgrWant', 'JobSeek', 'LastHireDate', 'LastInt', 'FizzBuzz',
       'JobFactors', 'ResumeUpdate', 'CurrencySymbol', 'CurrencyDesc',
       'CompTotal', 'CompFreq', 'SalaryUSD', 'WorkWeekHrs', 'WorkPlan',
       'WorkChallenge', 'WorkRemote', 'WorkLoc', 'ImpSyn', 'CodeRev',
       'CodeRevHrs', 'UnitTests', 'PurchaseHow', 'PurchaseWhat',
       'LanguageWorkedWith', 'LanguageDesireNextYear', 'DatabaseWorkedWith',
       'DatabaseDesireNextYear', 'PlatformWorkedWith',
       'PlatformDesireNextYear', 'WebFrameWorkedWith',
       'WebFrameDesireNextYear', 'MiscTechWorkedWith',
       'MiscTechDesireNextYear', 'DevEnviron', 'OpSys', 'Containers',
       'BlockchainOrg', 'BlockchainIs', 'BetterLife', 'I

In [74]:
survey_df['Hobbyist'] = survey_df['Hobbyist'].map({'Yes':True,'No':False}) 

In [75]:
survey_df['Hobbyist']

0         True
1        False
2         True
3        False
4         True
         ...  
88878     True
88879    False
88880    False
88881    False
88882     True
Name: Hobbyist, Length: 88883, dtype: bool