In [1]:
people = {
    'first' : ['Corey', 'Jane', 'John'],
    'last' : ['Schafer', 'Doe', 'Doe'],
    'email' : ['Corey@gmail.com', 'Jane@gmail.com', 'John@gmail.com']
}

In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,Corey@gmail.com
1,Jane,Doe,Jane@gmail.com
2,John,Doe,John@gmail.com


In [5]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

If we would like to rename all of the columns, then we can use an assignment

In [6]:
df.columns = ['first_name', 'last_name', 'email']

In [8]:
df.columns

Index(['first_name', 'last_name', 'email'], dtype='object')

In [9]:
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,Corey@gmail.com
1,Jane,Doe,Jane@gmail.com
2,John,Doe,John@gmail.com


In [10]:
# We can use a list comprehension to uppercase all of the column names.
df.columns = [x.upper() for x in df.columns]

In [56]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [17]:
# If instead, you would like to replace the spaces into underscores, you can also use list comprehensions
df.columns = df.columns.str.replace('_', ' ')
df

df.columns = df.columns.str.replace(' ', '_')
df

df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,Corey@gmail.com
1,Jane,Doe,Jane@gmail.com
2,John,Doe,John@gmail.com


In [25]:
# We can use the rename method thru a dictionary to rename specific columns. However, this is not permanent. 
# As such, use inplace = True to make it permanent.

df.rename(columns = {
    'first_name' : 'first', 
    'last_name' : 'last'
}, inplace = True)

df

Unnamed: 0,first,last,email
0,Corey,Schafer,Corey@gmail.com
1,Jane,Doe,Jane@gmail.com
2,John,Doe,John@gmail.com


In [26]:
df.iloc[2]

first              John
last                Doe
email    John@gmail.com
Name: 2, dtype: object

In [29]:
# Another way to do just df.iloc[2]
filt = (df['first'] == 'John') & (df['last'] == 'Doe')
df[filt]

Unnamed: 0,first,last,email
2,John,Doe,John@gmail.com


In [30]:
# We can change John's identity 
df.iloc[2] = ['John', 'Smith', 'Smith@gmail.com']
df

Unnamed: 0,first,last,email
0,Corey,Schafer,Corey@gmail.com
1,Jane,Doe,Jane@gmail.com
2,John,Smith,Smith@gmail.com


In [33]:
# If we do not want to change everything, just use the fact the loc takes in the second the columns.
df.loc[2, ['last', 'email']] = ['Doe', 'JohnDoe@gmail.com']
df

Unnamed: 0,first,last,email
0,Corey,Schafer,Corey@gmail.com
1,Jane,Doe,Jane@gmail.com
2,John,Doe,JohnDoe@gmail.com


In [36]:
df.loc[2, 'last'] = 'Smith'
# We can do the exact same thing using df.at
df.at[2, 'last'] = 'Doe'
df

Unnamed: 0,first,last,email
0,Corey,Schafer,Corey@gmail.com
1,Jane,Doe,Jane@gmail.com
2,John,Doe,JohnDoe@gmail.com


In [40]:
filt = (df['email'] == 'JohnDoe@gmail.com')

# Recall that the code below gives a DataFrame.
df[filt]

Unnamed: 0,first,last,email
2,John,Doe,JohnDoe@gmail.com


In [41]:
# You can then specify further and write:
df[filt]['last']

2    Doe
Name: last, dtype: object

In [42]:
# However, do not use the code above to change the Doe into a Smith
df[filt]['last'] = 'Smith'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt]['last'] = 'Smith'


In [46]:
# It did not make the change here because there are multiple operations of the background.
# To fix this, we can just do the regular things while still using the filter
df.loc[filt, 'last'] = 'Smith'
df

Unnamed: 0,first,last,email
0,Corey,Schafer,Corey@gmail.com
1,Jane,Doe,Jane@gmail.com
2,John,Smith,JohnDoe@gmail.com


In [49]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,Corey,Schafer,corey@gmail.com
1,Jane,Doe,jane@gmail.com
2,John,Smith,johndoe@gmail.com


We will be looking at four methods: apply, map, applymap, replace. Note that all of this are impermanent. Apply is used for calling a function on our values. Can be used on a dataframe v series objects. Let us look on how apply works on a series.

In [50]:
df['email'].apply(len)

0    15
1    14
2    17
Name: email, dtype: int64

In [53]:
# We can also use this to update values through a new function
def update_email(email):
    return email.upper()

df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,COREY@GMAIL.COM
1,Jane,Doe,JANE@GMAIL.COM
2,John,Smith,JOHNDOE@GMAIL.COM


In [54]:
# We can also use a lambda function
df['email'].apply(lambda x: x.lower())

0      corey@gmail.com
1       jane@gmail.com
2    johndoe@gmail.com
Name: email, dtype: object

In [58]:
# Let us now look into how apply works in a DataFrame
# When a DataFrame, it is applying the len function to each series in the DataFrame.
df.apply(len)

first    3
last     3
email    3
dtype: int64

In [59]:
# It applies to each column. For example, apply applied it to each rows since that is the default
len(df['email'])

3

In [60]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,COREY@GMAIL.COM
1,Jane,Doe,JANE@GMAIL.COM
2,John,Smith,JOHNDOE@GMAIL.COM


In [61]:
df['email']

0      COREY@GMAIL.COM
1       JANE@GMAIL.COM
2    JOHNDOE@GMAIL.COM
Name: email, dtype: object

In [63]:
df.apply(len, axis = 'columns')
# Here, for the 0th row, it is counting if there exists for Corey, Schafer, and his email
# Basically, apply by column if axis = 'columns'

0    3
1    3
2    3
dtype: int64

In [64]:
df.apply(pd.Series.min)

first              Corey
last                 Doe
email    COREY@GMAIL.COM
dtype: object

In [65]:
 df.apply(pd.Series.min, axis = 'columns')

0      COREY@GMAIL.COM
1                  Doe
2    JOHNDOE@GMAIL.COM
dtype: object

In [66]:
df.apply(lambda x : x.min())

first              Corey
last                 Doe
email    COREY@GMAIL.COM
dtype: object

In summary, running ```apply``` for a Series object, applies the function to every value in the Series. Running ```apply``` to a DataFrame, applies to every Series in the DataFrame.

If instead you would like to apply a function for each point in the DataFrame, then you would use ```applymap```.

In [68]:
df.applymap(len)

  df.applymap(len)


Unnamed: 0,first,last,email
0,5,7,15
1,4,3,14
2,4,5,17


In [70]:
df.applymap(str.lower)

  df.applymap(str.lower)


Unnamed: 0,first,last,email
0,corey,schafer,corey@gmail.com
1,jane,doe,jane@gmail.com
2,john,smith,johndoe@gmail.com


We will now look at the ```map``` method. This only works in a Series object. Mapping substitutes each value in the series with another value. 

In [72]:
# This will return a series. Do note that the values we did not substitute becomes a NaN value.
df['first'].map({'Corey':'Chris', 'Jane': 'Mary'})

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

If you do not want to replace *ALL* the variables, just use the ```replace``` method.

In [73]:
df['first'].replace({'Corey':'Chris', 'Jane': 'Mary'})

0    Chris
1     Mary
2     John
Name: first, dtype: object

In [74]:
# To make this permanent
df['first'] = df['first'].replace({'Corey':'Chris', 'Jane': 'Mary'})
df

Unnamed: 0,first,last,email
0,Chris,Schafer,COREY@GMAIL.COM
1,Mary,Doe,JANE@GMAIL.COM
2,John,Smith,JOHNDOE@GMAIL.COM


# We will now use the big survey data.

In [75]:
df = pd.read_csv('data/survey_results_public.csv')
schema_df = pd.read_csv('data/survey_results_schema.csv')

In [76]:
pd.set_option('display.max_columns', 85)
pd.set_option('display.max_rows', 85)

In [77]:
df

Unnamed: 0,Respondent,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
0,1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,4,10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;Java;JavaScript;Python,C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL,SQLite,MySQL,MacOS;Windows,Android;Arduino;Windows,Django;Flask,Flask;jQuery,Node.js,Node.js,IntelliJ;Notepad++;PyCharm,Windows,I do not use containers,,,Yes,"Fortunately, someone else has that title",Yes,Twitter,Online,Username,2017,A few times per month or weekly,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,31-60 minutes,No,,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
1,2,I am a student who is learning to code,No,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,Taken an online course in programming or softw...,,"Developer, desktop or enterprise applications;...",,17,,,,,,,I am actively looking for a job,I've never had a job,,,Financial performance or funding status of the...,"Something else changed (education, award, medi...",,,,,,,,,,,,,,,,,C++;HTML/CSS;Python,C++;HTML/CSS;JavaScript;SQL,,MySQL,Windows,Windows,Django,Django,,,Atom;PyCharm,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,Yes,Instagram,Online,Username,2017,Daily or almost daily,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,11-30 minutes,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,3,"I am not primarily a developer, but I write co...",Yes,Never,The quality of OSS and closed source software ...,Employed full-time,Thailand,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,"Taught yourself a new language, framework, or ...",100 to 499 employees,"Designer;Developer, back-end;Developer, front-...",3,22,1,Slightly satisfied,Slightly satisfied,Not at all confident,Not sure,Not sure,"I’m not actively looking, but I am open to new...",1-2 years ago,Interview with people in peer roles,No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,THB,Thai baht,23000.0,Monthly,8820.0,40.0,There's no schedule or spec; I work on what se...,Distracting work environment;Inadequate access...,Less than once per month / Never,Home,Average,No,,"No, but I think we should",Not sure,I have little or no influence,HTML/CSS,Elixir;HTML/CSS,PostgreSQL,PostgreSQL,,,,Other(s):,,,Vim;Visual Studio Code,Linux-based,I do not use containers,,,Yes,Yes,Yes,Reddit,In real life (in person),Username,2011,A few times per week,Find answers to specific questions;Learn how t...,6-10 times per week,They were about the same,,Yes,Less than once per month or monthly,Yes,"No, I've heard of them, but I am not part of a...",Neutral,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,28.0,Man,No,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult
3,4,I am a developer by profession,No,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,100 to 499 employees,"Developer, full-stack",3,16,Less than 1 year,Very satisfied,Slightly satisfied,Very confident,No,Not sure,I am not interested in new job opportunities,Less than a year ago,"Write code by hand (e.g., on a whiteboard);Int...",No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,USD,United States dollar,61000.0,Yearly,61000.0,80.0,There's no schedule or spec; I work on what se...,,Less than once per month / Never,Home,A little below average,No,,"No, but I think we should",Developers typically have the most influence o...,I have little or no influence,C;C++;C#;Python;SQL,C;C#;JavaScript;SQL,MySQL;SQLite,MySQL;SQLite,Linux;Windows,Linux;Windows,,,.NET,.NET,Eclipse;Vim;Visual Studio;Visual Studio Code,Windows,I do not use containers,Not at all,"Useful for decentralized currency (i.e., Bitcoin)",Yes,SIGH,Yes,Reddit,In real life (in person),Username,2014,Daily or almost daily,Find answers to specific questions;Pass the ti...,1-2 times per week,Stack Overflow was much faster,31-60 minutes,Yes,Less than once per month or monthly,Yes,"No, and I don't know what those are","No, not really",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
4,5,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Ukraine,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,"10,000 or more employees","Academic researcher;Developer, desktop or ente...",16,14,9,Very dissatisfied,Slightly dissatisfied,Somewhat confident,Yes,No,I am not interested in new job opportunities,Less than a year ago,"Write any code;Write code by hand (e.g., on a ...",No,"Industry that I'd be working in;Languages, fra...",I was preparing for a job search,UAH,Ukrainian hryvnia,,,,55.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Inadequ...,A few days each month,Office,A little above average,"Yes, because I see value in code review",,"Yes, it's part of our process",Not sure,I have little or no influence,C++;HTML/CSS;Java;JavaScript;Python;SQL;VBA,HTML/CSS;Java;JavaScript;SQL;WebAssembly,Couchbase;MongoDB;MySQL;Oracle;PostgreSQL;SQLite,Couchbase;Firebase;MongoDB;MySQL;Oracle;Postgr...,Android;Linux;MacOS;Slack;Windows,Android;Docker;Kubernetes;Linux;Slack,Django;Express;Flask;jQuery;React.js;Spring,Flask;jQuery;React.js;Spring,Cordova;Node.js,Apache Spark;Hadoop;Node.js;React Native,IntelliJ;Notepad++;Vim,Linux-based,"Outside of work, for personal projects",Not at all,,Yes,Also Yes,Yes,Facebook,In real life (in person),Username,I don't remember,Multiple times per day,Find answers to specific questions,More than 10 times per week,Stack Overflow was much faster,,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","Yes, definitely",Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,30.0,Man,No,Straight / Heterosexual,White or of European descent;Multiracial,No,Appropriate in length,Easy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88878,88377,,Yes,Less than once a month but more than once per ...,The quality of OSS and closed source software ...,"Not employed, and not looking for work",Canada,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;JavaScript;Other(s):,C++;HTML/CSS;JavaScript;SQL;WebAssembly;Other(s):,Firebase;SQLite,Firebase;MySQL;SQLite,Linux,Google Cloud Platform;Linux,jQuery,jQuery;Vue.js,Node.js,React Native;Unity 3D;Unreal Engine,Atom;Visual Studio;Visual Studio Code,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,What?,YouTube,,Username,I don't remember,A few times per week,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was slightly faster,11-30 minutes,Yes,I have never participated in Q&A on Stack Over...,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","No, not at all",,Tech articles written by other developers;Tech...,,Man,No,,,No,Appropriate in length,Easy
88879,88601,,No,Never,The quality of OSS and closed source software ...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
88880,88802,,No,Never,,Employed full-time,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
88881,88816,,No,Never,"OSS is, on average, of HIGHER quality than pro...","Independent contractor, freelancer, or self-em...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
