# Pandas

In [2]:
import pandas as pd
import numpy as np

## Loading data

In [3]:
df = pd.read_csv('data/survey_results_public.csv')

In [4]:
df

Unnamed: 0,Respondent,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,...,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
0,1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
1,2,I am a student who is learning to code,No,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,3,"I am not primarily a developer, but I write co...",Yes,Never,The quality of OSS and closed source software ...,Employed full-time,Thailand,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,...,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,28.0,Man,No,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult
3,4,I am a developer by profession,No,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
4,5,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Ukraine,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,30.0,Man,No,Straight / Heterosexual,White or of European descent;Multiracial,No,Appropriate in length,Easy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88878,88377,,Yes,Less than once a month but more than once per ...,The quality of OSS and closed source software ...,"Not employed, and not looking for work",Canada,No,Primary/elementary school,,...,,Tech articles written by other developers;Tech...,,Man,No,,,No,Appropriate in length,Easy
88879,88601,,No,Never,The quality of OSS and closed source software ...,,,,,,...,,,,,,,,,,
88880,88802,,No,Never,,Employed full-time,,,,,...,,,,,,,,,,
88881,88816,,No,Never,"OSS is, on average, of HIGHER quality than pro...","Independent contractor, freelancer, or self-em...",,,,,...,,,,,,,,,,


In [5]:
df.shape

(88883, 85)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88883 entries, 0 to 88882
Data columns (total 85 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Respondent              88883 non-null  int64  
 1   MainBranch              88331 non-null  object 
 2   Hobbyist                88883 non-null  object 
 3   OpenSourcer             88883 non-null  object 
 4   OpenSource              86842 non-null  object 
 5   Employment              87181 non-null  object 
 6   Country                 88751 non-null  object 
 7   Student                 87014 non-null  object 
 8   EdLevel                 86390 non-null  object 
 9   UndergradMajor          75614 non-null  object 
 10  EduOther                84260 non-null  object 
 11  OrgSize                 71791 non-null  object 
 12  DevType                 81335 non-null  object 
 13  YearsCode               87938 non-null  object 
 14  Age1stCode              87634 non-null

In [7]:
pd.set_option('display.max_columns', 85)
pd.set_option('display.max_rows', 85)

In [8]:
schema_df = pd.read_csv('data/survey_results_schema.csv', index_col='Column')

In [9]:
schema_df

Unnamed: 0_level_0,QuestionText
Column,Unnamed: 1_level_1
Respondent,Randomized respondent ID number (not in order ...
MainBranch,Which of the following options best describes ...
Hobbyist,Do you code as a hobby?
OpenSourcer,How often do you contribute to open source?
OpenSource,How do you feel about the quality of open sour...
Employment,Which of the following best describes your cur...
Country,In which country do you currently reside?
Student,"Are you currently enrolled in a formal, degree..."
EdLevel,Which of the following best describes the high...
UndergradMajor,What was your main or most important field of ...


## DataFrame basics

In [10]:
people_dict = {
    'first': ['Corey', 'John', 'Nick'],
    'last': ['Kowalski', 'Doe', 'Doe'],
    'email': ['corey@gmail.com', 'j.doe@yahoo.com', 'nick_deo@gmail.com']
}

In [11]:
people_df = pd.DataFrame(people_dict)    # DataFrame from dictionary
people_df

Unnamed: 0,first,last,email
0,Corey,Kowalski,corey@gmail.com
1,John,Doe,j.doe@yahoo.com
2,Nick,Doe,nick_deo@gmail.com


In [12]:
people_df['email']  # Return series (pandas list - 1D vector)

0       corey@gmail.com
1       j.doe@yahoo.com
2    nick_deo@gmail.com
Name: email, dtype: object

In [13]:
people_df.email

0       corey@gmail.com
1       j.doe@yahoo.com
2    nick_deo@gmail.com
Name: email, dtype: object

In [14]:
print(type(people_df['email']), type(people_df.email))

<class 'pandas.core.series.Series'> <class 'pandas.core.series.Series'>


In [15]:
people_df[['email', 'first']]   # we can change display order of columns

Unnamed: 0,email,first
0,corey@gmail.com,Corey
1,j.doe@yahoo.com,John
2,nick_deo@gmail.com,Nick


In [16]:
type(people_df[['first', 'email']])

pandas.core.frame.DataFrame

In [17]:
people_df.columns

Index(['first', 'last', 'email'], dtype='object')

iloc -> intager location (we can onlu use intagers)

loc -> location labels and indexes if no labels

In [18]:
people_df.iloc[0]

first              Corey
last            Kowalski
email    corey@gmail.com
Name: 0, dtype: object

In [19]:
people_df.iloc[[0, 1]]

Unnamed: 0,first,last,email
0,Corey,Kowalski,corey@gmail.com
1,John,Doe,j.doe@yahoo.com


In [20]:
people_df.iloc[[0, 1], 2]   # get 0,1 rows and 2 column

0    corey@gmail.com
1    j.doe@yahoo.com
Name: email, dtype: object

In [21]:
people_df.loc[[1, 2], 'email']

1       j.doe@yahoo.com
2    nick_deo@gmail.com
Name: email, dtype: object

In [22]:
people_df.loc[[1, 2], ['email', 'last']]

Unnamed: 0,email,last
1,j.doe@yahoo.com,Doe
2,nick_deo@gmail.com,Doe


In [23]:
df.columns

Index(['Respondent', 'MainBranch', 'Hobbyist', 'OpenSourcer', 'OpenSource',
       'Employment', 'Country', 'Student', 'EdLevel', 'UndergradMajor',
       'EduOther', 'OrgSize', 'DevType', 'YearsCode', 'Age1stCode',
       'YearsCodePro', 'CareerSat', 'JobSat', 'MgrIdiot', 'MgrMoney',
       'MgrWant', 'JobSeek', 'LastHireDate', 'LastInt', 'FizzBuzz',
       'JobFactors', 'ResumeUpdate', 'CurrencySymbol', 'CurrencyDesc',
       'CompTotal', 'CompFreq', 'ConvertedComp', 'WorkWeekHrs', 'WorkPlan',
       'WorkChallenge', 'WorkRemote', 'WorkLoc', 'ImpSyn', 'CodeRev',
       'CodeRevHrs', 'UnitTests', 'PurchaseHow', 'PurchaseWhat',
       'LanguageWorkedWith', 'LanguageDesireNextYear', 'DatabaseWorkedWith',
       'DatabaseDesireNextYear', 'PlatformWorkedWith',
       'PlatformDesireNextYear', 'WebFrameWorkedWith',
       'WebFrameDesireNextYear', 'MiscTechWorkedWith',
       'MiscTechDesireNextYear', 'DevEnviron', 'OpSys', 'Containers',
       'BlockchainOrg', 'BlockchainIs', 'BetterLife'

In [24]:
df['Country']

0                United Kingdom
1        Bosnia and Herzegovina
2                      Thailand
3                 United States
4                       Ukraine
                  ...          
88878                    Canada
88879                       NaN
88880                       NaN
88881                       NaN
88882                     Spain
Name: Country, Length: 88883, dtype: object

In [25]:
df['Country'].value_counts()

Country
United States        20949
India                 9061
Germany               5866
United Kingdom        5737
Canada                3395
                     ...  
Tonga                    1
Timor-Leste              1
North Korea              1
Brunei Darussalam        1
Chad                     1
Name: count, Length: 179, dtype: int64

In [26]:
df.loc[[10, 11, 12], 'Country']

10    Antigua and Barbuda
11                 Canada
12          United States
Name: Country, dtype: object

In [27]:
df.loc[10:12, 'Country':'EdLevel']  # [10, 12] both inclusive not [10. 12)

Unnamed: 0,Country,Student,EdLevel
10,Antigua and Barbuda,"Yes, full-time",Primary/elementary school
11,Canada,"Yes, full-time",Some college/university study without earning ...
12,United States,No,"Master’s degree (MA, MS, M.Eng., MBA, etc.)"


## Custom indexes

In [28]:
people_df['email']

0       corey@gmail.com
1       j.doe@yahoo.com
2    nick_deo@gmail.com
Name: email, dtype: object

In [29]:
people_df.set_index('email')    # doesn't modify DataFrame

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
corey@gmail.com,Corey,Kowalski
j.doe@yahoo.com,John,Doe
nick_deo@gmail.com,Nick,Doe


In [30]:
people_df   # set_index didn't change the DataFrame

Unnamed: 0,first,last,email
0,Corey,Kowalski,corey@gmail.com
1,John,Doe,j.doe@yahoo.com
2,Nick,Doe,nick_deo@gmail.com


In [31]:
people_df.set_index('email', inplace=True)  # changes DataFrame

In [32]:
people_df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
corey@gmail.com,Corey,Kowalski
j.doe@yahoo.com,John,Doe
nick_deo@gmail.com,Nick,Doe


In [33]:
people_df.index

Index(['corey@gmail.com', 'j.doe@yahoo.com', 'nick_deo@gmail.com'], dtype='object', name='email')

In [34]:
people_df.loc['j.doe@yahoo.com', 'last']

'Doe'

In [35]:
people_df.iloc[1, 1]

'Doe'

In [36]:
people_df.reset_index(inplace=True)
people_df

Unnamed: 0,email,first,last
0,corey@gmail.com,Corey,Kowalski
1,j.doe@yahoo.com,John,Doe
2,nick_deo@gmail.com,Nick,Doe


In [37]:
df = pd.read_csv('data/survey_results_public.csv', index_col='Respondent')
df

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,4,10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;Java;JavaScript;Python,C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL,SQLite,MySQL,MacOS;Windows,Android;Arduino;Windows,Django;Flask,Flask;jQuery,Node.js,Node.js,IntelliJ;Notepad++;PyCharm,Windows,I do not use containers,,,Yes,"Fortunately, someone else has that title",Yes,Twitter,Online,Username,2017,A few times per month or weekly,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,31-60 minutes,No,,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,I am a student who is learning to code,No,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,Taken an online course in programming or softw...,,"Developer, desktop or enterprise applications;...",,17,,,,,,,I am actively looking for a job,I've never had a job,,,Financial performance or funding status of the...,"Something else changed (education, award, medi...",,,,,,,,,,,,,,,,,C++;HTML/CSS;Python,C++;HTML/CSS;JavaScript;SQL,,MySQL,Windows,Windows,Django,Django,,,Atom;PyCharm,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,Yes,Instagram,Online,Username,2017,Daily or almost daily,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,11-30 minutes,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
3,"I am not primarily a developer, but I write co...",Yes,Never,The quality of OSS and closed source software ...,Employed full-time,Thailand,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,"Taught yourself a new language, framework, or ...",100 to 499 employees,"Designer;Developer, back-end;Developer, front-...",3,22,1,Slightly satisfied,Slightly satisfied,Not at all confident,Not sure,Not sure,"I’m not actively looking, but I am open to new...",1-2 years ago,Interview with people in peer roles,No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,THB,Thai baht,23000.0,Monthly,8820.0,40.0,There's no schedule or spec; I work on what se...,Distracting work environment;Inadequate access...,Less than once per month / Never,Home,Average,No,,"No, but I think we should",Not sure,I have little or no influence,HTML/CSS,Elixir;HTML/CSS,PostgreSQL,PostgreSQL,,,,Other(s):,,,Vim;Visual Studio Code,Linux-based,I do not use containers,,,Yes,Yes,Yes,Reddit,In real life (in person),Username,2011,A few times per week,Find answers to specific questions;Learn how t...,6-10 times per week,They were about the same,,Yes,Less than once per month or monthly,Yes,"No, I've heard of them, but I am not part of a...",Neutral,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,28.0,Man,No,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult
4,I am a developer by profession,No,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,100 to 499 employees,"Developer, full-stack",3,16,Less than 1 year,Very satisfied,Slightly satisfied,Very confident,No,Not sure,I am not interested in new job opportunities,Less than a year ago,"Write code by hand (e.g., on a whiteboard);Int...",No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,USD,United States dollar,61000.0,Yearly,61000.0,80.0,There's no schedule or spec; I work on what se...,,Less than once per month / Never,Home,A little below average,No,,"No, but I think we should",Developers typically have the most influence o...,I have little or no influence,C;C++;C#;Python;SQL,C;C#;JavaScript;SQL,MySQL;SQLite,MySQL;SQLite,Linux;Windows,Linux;Windows,,,.NET,.NET,Eclipse;Vim;Visual Studio;Visual Studio Code,Windows,I do not use containers,Not at all,"Useful for decentralized currency (i.e., Bitcoin)",Yes,SIGH,Yes,Reddit,In real life (in person),Username,2014,Daily or almost daily,Find answers to specific questions;Pass the ti...,1-2 times per week,Stack Overflow was much faster,31-60 minutes,Yes,Less than once per month or monthly,Yes,"No, and I don't know what those are","No, not really",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
5,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Ukraine,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,"10,000 or more employees","Academic researcher;Developer, desktop or ente...",16,14,9,Very dissatisfied,Slightly dissatisfied,Somewhat confident,Yes,No,I am not interested in new job opportunities,Less than a year ago,"Write any code;Write code by hand (e.g., on a ...",No,"Industry that I'd be working in;Languages, fra...",I was preparing for a job search,UAH,Ukrainian hryvnia,,,,55.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Inadequ...,A few days each month,Office,A little above average,"Yes, because I see value in code review",,"Yes, it's part of our process",Not sure,I have little or no influence,C++;HTML/CSS;Java;JavaScript;Python;SQL;VBA,HTML/CSS;Java;JavaScript;SQL;WebAssembly,Couchbase;MongoDB;MySQL;Oracle;PostgreSQL;SQLite,Couchbase;Firebase;MongoDB;MySQL;Oracle;Postgr...,Android;Linux;MacOS;Slack;Windows,Android;Docker;Kubernetes;Linux;Slack,Django;Express;Flask;jQuery;React.js;Spring,Flask;jQuery;React.js;Spring,Cordova;Node.js,Apache Spark;Hadoop;Node.js;React Native,IntelliJ;Notepad++;Vim,Linux-based,"Outside of work, for personal projects",Not at all,,Yes,Also Yes,Yes,Facebook,In real life (in person),Username,I don't remember,Multiple times per day,Find answers to specific questions,More than 10 times per week,Stack Overflow was much faster,,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","Yes, definitely",Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,30.0,Man,No,Straight / Heterosexual,White or of European descent;Multiracial,No,Appropriate in length,Easy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88377,,Yes,Less than once a month but more than once per ...,The quality of OSS and closed source software ...,"Not employed, and not looking for work",Canada,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;JavaScript;Other(s):,C++;HTML/CSS;JavaScript;SQL;WebAssembly;Other(s):,Firebase;SQLite,Firebase;MySQL;SQLite,Linux,Google Cloud Platform;Linux,jQuery,jQuery;Vue.js,Node.js,React Native;Unity 3D;Unreal Engine,Atom;Visual Studio;Visual Studio Code,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,What?,YouTube,,Username,I don't remember,A few times per week,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was slightly faster,11-30 minutes,Yes,I have never participated in Q&A on Stack Over...,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","No, not at all",,Tech articles written by other developers;Tech...,,Man,No,,,No,Appropriate in length,Easy
88601,,No,Never,The quality of OSS and closed source software ...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
88802,,No,Never,,Employed full-time,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
88816,,No,Never,"OSS is, on average, of HIGHER quality than pro...","Independent contractor, freelancer, or self-em...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [38]:
df.loc[123]

MainBranch                                   I am a developer by profession
Hobbyist                                                                Yes
OpenSourcer               Less than once a month but more than once per ...
OpenSource                The quality of OSS and closed source software ...
Employment                                               Employed full-time
Country                                                              Turkey
Student                                                                  No
EdLevel                            Bachelor’s degree (BA, BS, B.Eng., etc.)
UndergradMajor            Another engineering discipline (ex. civil, ele...
EduOther                  Taken an online course in programming or softw...
OrgSize                                            1,000 to 4,999 employees
DevType                   Developer, back-end;Developer, front-end;Devel...
YearsCode                                                                 6
Age1stCode  

In [39]:
schema_df.loc['Hobbyist']

QuestionText    Do you code as a hobby?
Name: Hobbyist, dtype: object

In [40]:
schema_df.loc['MgrIdiot', 'QuestionText']

'How confident are you that your manager knows what they’re doing?'

In [41]:
schema_df.sort_index(ascending=False, inplace=True)  # Sorts indexes alphabeticly in reverse order
schema_df

Unnamed: 0_level_0,QuestionText
Column,Unnamed: 1_level_1
YearsCodePro,How many years have you coded professionally (...
YearsCode,"Including any education, how many years have y..."
WorkWeekHrs,"On average, how many hours per week do you work?"
WorkRemote,How often do you work remotely?
WorkPlan,How structured or planned is your work?
WorkLoc,Where would you prefer to work?
WorkChallenge,"Of these options, what are your greatest chall..."
WelcomeChange,"Compared to last year, how welcome do you feel..."
WebFrameWorkedWith,Which of the following web frameworks have you...
WebFrameDesireNextYear,Which of the following web frameworks have you...


## Filtering data

In [42]:
# Filter people who don't have last name 'Doe'
df_filter = ~(people_df['last'] == 'Doe')
df_filter

0     True
1    False
2    False
Name: last, dtype: bool

**All of those methods give the same result**

Using `loc` is the best option, because it additionaly allows for getting columns 

In [43]:
people_df[df_filter]

Unnamed: 0,email,first,last
0,corey@gmail.com,Corey,Kowalski


In [44]:
people_df[people_df['last'] == 'Doe']

Unnamed: 0,email,first,last
1,j.doe@yahoo.com,John,Doe
2,nick_deo@gmail.com,Nick,Doe


In [45]:
# Getting emails of respondents whose last name is 'Doe;
people_df.loc[df_filter, 'email']

0    corey@gmail.com
Name: email, dtype: object

In [46]:
people_df.loc[df_filter]

Unnamed: 0,email,first,last
0,corey@gmail.com,Corey,Kowalski


In [47]:
df_filter1 = (people_df['last'] == 'Doe') & (people_df['first'] == 'Nick')
df_filter2 = ~(people_df['last'] == 'Doe') | (people_df['first'] == 'Nick')

In [48]:
people_df.loc[df_filter1]

Unnamed: 0,email,first,last
2,nick_deo@gmail.com,Nick,Doe


In [49]:
people_df.loc[df_filter2]

Unnamed: 0,email,first,last
0,corey@gmail.com,Corey,Kowalski
2,nick_deo@gmail.com,Nick,Doe


In [50]:
# Filtering respondents with high salary
high_salary = (df['ConvertedComp'] > 70000)

df.loc[high_salary, ['Country', 'LanguageWorkedWith', 'ConvertedComp']]

Unnamed: 0_level_0,Country,LanguageWorkedWith,ConvertedComp
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
6,Canada,Java;R;SQL,366420.0
9,New Zealand,Bash/Shell/PowerShell;C#;HTML/CSS;JavaScript;P...,95179.0
13,United States,Bash/Shell/PowerShell;HTML/CSS;JavaScript;PHP;...,90000.0
16,United Kingdom,Bash/Shell/PowerShell;C#;HTML/CSS;JavaScript;T...,455352.0
22,United States,Bash/Shell/PowerShell;C++;HTML/CSS;JavaScript;...,103000.0
...,...,...,...
88876,United States,Bash/Shell/PowerShell;C#;HTML/CSS;Java;Python;...,180000.0
88877,United States,Bash/Shell/PowerShell;C;Clojure;HTML/CSS;Java;...,2000000.0
88878,United States,HTML/CSS;JavaScript;Scala;TypeScript,130000.0
88879,Finland,Bash/Shell/PowerShell;C++;Python,82488.0


In [51]:
countries = ['United States', 'Indina', 'United Kingdom', 'Germany', 'Canada']
country_filter = df['Country'].isin(countries)

df.loc[country_filter, 'Country']

Respondent
1        United Kingdom
4         United States
6                Canada
12               Canada
13        United States
              ...      
85182            Canada
85642     United States
85961    United Kingdom
88282     United States
88377            Canada
Name: Country, Length: 35947, dtype: object

In [52]:
# Gettnig respondends who know Python
lang_filter = df['LanguageWorkedWith'].str.contains('Python', na=False) # NaN = False

df.loc[lang_filter, 'LanguageWorkedWith']

Respondent
1                          HTML/CSS;Java;JavaScript;Python
2                                      C++;HTML/CSS;Python
4                                      C;C++;C#;Python;SQL
5              C++;HTML/CSS;Java;JavaScript;Python;SQL;VBA
8        Bash/Shell/PowerShell;C;C++;HTML/CSS;Java;Java...
                               ...                        
84539    Bash/Shell/PowerShell;C;C++;HTML/CSS;Java;Java...
85738      Bash/Shell/PowerShell;C++;Python;Ruby;Other(s):
86566      Bash/Shell/PowerShell;HTML/CSS;Python;Other(s):
87739             C;C++;HTML/CSS;JavaScript;PHP;Python;SQL
88212                           HTML/CSS;JavaScript;Python
Name: LanguageWorkedWith, Length: 36443, dtype: object

## Updating row and columns

In [53]:
people_df.columns

Index(['email', 'first', 'last'], dtype='object')

In [54]:
people_df.columns = ['email', 'first_name', 'last_name']
people_df

Unnamed: 0,email,first_name,last_name
0,corey@gmail.com,Corey,Kowalski
1,j.doe@yahoo.com,John,Doe
2,nick_deo@gmail.com,Nick,Doe


In [55]:
people_df.columns = [col.upper() for col in people_df.columns]
people_df

Unnamed: 0,EMAIL,FIRST_NAME,LAST_NAME
0,corey@gmail.com,Corey,Kowalski
1,j.doe@yahoo.com,John,Doe
2,nick_deo@gmail.com,Nick,Doe


In [56]:
people_df.columns = people_df.columns.str.replace('_', ' ')
people_df

Unnamed: 0,EMAIL,FIRST NAME,LAST NAME
0,corey@gmail.com,Corey,Kowalski
1,j.doe@yahoo.com,John,Doe
2,nick_deo@gmail.com,Nick,Doe


In [57]:
people_df.rename(columns={'FIRST NAME': 'first', 'LAST NAME': 'last'}, inplace=True)
people_df

Unnamed: 0,EMAIL,first,last
0,corey@gmail.com,Corey,Kowalski
1,j.doe@yahoo.com,John,Doe
2,nick_deo@gmail.com,Nick,Doe


In [58]:
people_df.loc[2] = ['fizykana6@op.pl','Nickolas', 'Pasieka']
people_df

Unnamed: 0,EMAIL,first,last
0,corey@gmail.com,Corey,Kowalski
1,j.doe@yahoo.com,John,Doe
2,fizykana6@op.pl,Nickolas,Pasieka


In [59]:
people_df.loc[2, ['last', 'EMAIL']] = ['Doe', 'nick@gmail.com']
people_df

Unnamed: 0,EMAIL,first,last
0,corey@gmail.com,Corey,Kowalski
1,j.doe@yahoo.com,John,Doe
2,nick@gmail.com,Nickolas,Doe


In [60]:
people_df.loc[2, 'last'] = 'Pasieka'
people_df

Unnamed: 0,EMAIL,first,last
0,corey@gmail.com,Corey,Kowalski
1,j.doe@yahoo.com,John,Doe
2,nick@gmail.com,Nickolas,Pasieka


In [61]:
# Same as loc
people_df.at[2, 'last'] = 'Doe'
people_df

Unnamed: 0,EMAIL,first,last
0,corey@gmail.com,Corey,Kowalski
1,j.doe@yahoo.com,John,Doe
2,nick@gmail.com,Nickolas,Doe


In [62]:
filt = (people_df['EMAIL'] == 'nick@gmail.com')
people_df[filt]['last'] = 'Pasieka'

people_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  people_df[filt]['last'] = 'Pasieka'


Unnamed: 0,EMAIL,first,last
0,corey@gmail.com,Corey,Kowalski
1,j.doe@yahoo.com,John,Doe
2,nick@gmail.com,Nickolas,Doe


**When setting values it is best to use `.loc` or `.at`**

In [63]:
people_df['EMAIL'].str.upper()
print(people_df)

people_df['EMAIL'] = people_df['EMAIL'].str.upper()
print(people_df)

             EMAIL     first      last
0  corey@gmail.com     Corey  Kowalski
1  j.doe@yahoo.com      John       Doe
2   nick@gmail.com  Nickolas       Doe
             EMAIL     first      last
0  COREY@GMAIL.COM     Corey  Kowalski
1  J.DOE@YAHOO.COM      John       Doe
2   NICK@GMAIL.COM  Nickolas       Doe


In [64]:
people_df.rename(columns={'EMAIL':'email'}, inplace=True)

In [65]:
# Get length of email adresses
people_df['email'].apply(len)

0    15
1    15
2    14
Name: email, dtype: int64

In [66]:
def updateEmail(email: str):
    return email.upper()

people_df['email'] = people_df['email'].apply(updateEmail)
people_df

Unnamed: 0,email,first,last
0,COREY@GMAIL.COM,Corey,Kowalski
1,J.DOE@YAHOO.COM,John,Doe
2,NICK@GMAIL.COM,Nickolas,Doe


In [67]:
people_df['email'] = people_df['email'].apply(lambda email: email.lower())
people_df

Unnamed: 0,email,first,last
0,corey@gmail.com,Corey,Kowalski
1,j.doe@yahoo.com,John,Doe
2,nick@gmail.com,Nickolas,Doe


In [68]:
people_df.apply(len)

email    3
first    3
last     3
dtype: int64

In [69]:
people_df.apply(pd.Series.min)

email    corey@gmail.com
first              Corey
last                 Doe
dtype: object

In [70]:
people_df.apply(lambda x: x.min())

email    corey@gmail.com
first              Corey
last                 Doe
dtype: object

In [71]:
people_df.apply(pd.Series.min, axis='columns')

0    Corey
1      Doe
2      Doe
dtype: object

In [72]:
people_df.applymap(len)

Unnamed: 0,email,first,last
0,15,5,8
1,15,4,3
2,14,8,3


In [73]:
people_df['first'].map({'Corey': 'Manny', 'Nickolas': 'Makumba'})

0      Manny
1        NaN
2    Makumba
Name: first, dtype: object

In [74]:
people_df['first'].replace({'Corey': 'Manny', 'Nickolas': 'Makumba'})

0      Manny
1       John
2    Makumba
Name: first, dtype: object

In [75]:
df['Hobbyist'] = df['Hobbyist'].map({'Yes': True, 'No': False})

In [76]:
df.loc[:10, 'Hobbyist']

Respondent
1      True
2     False
3      True
4     False
5      True
6      True
7     False
8      True
9      True
10     True
Name: Hobbyist, dtype: bool

## Add/Remove Rows and Columns

In [77]:
# Only ['col'] notation works, '.' doesn't
people_df['full_name'] = people_df['first'] + ' ' + people_df['last']
people_df

Unnamed: 0,email,first,last,full_name
0,corey@gmail.com,Corey,Kowalski,Corey Kowalski
1,j.doe@yahoo.com,John,Doe,John Doe
2,nick@gmail.com,Nickolas,Doe,Nickolas Doe


In [78]:
people_df.drop(columns=['first', 'last'], inplace=True)
people_df

Unnamed: 0,email,full_name
0,corey@gmail.com,Corey Kowalski
1,j.doe@yahoo.com,John Doe
2,nick@gmail.com,Nickolas Doe


In [79]:
people_df['full_name'].str.split(' ')

0    [Corey, Kowalski]
1          [John, Doe]
2      [Nickolas, Doe]
Name: full_name, dtype: object

In [80]:
people_df['full_name'].str.split(' ', expand=True)

Unnamed: 0,0,1
0,Corey,Kowalski
1,John,Doe
2,Nickolas,Doe


In [81]:
people_df[['first', 'last']] = people_df['full_name'].str.split(' ', expand=True)
people_df

Unnamed: 0,email,full_name,first,last
0,corey@gmail.com,Corey Kowalski,Corey,Kowalski
1,j.doe@yahoo.com,John Doe,John,Doe
2,nick@gmail.com,Nickolas Doe,Nickolas,Doe


In [82]:
new_rows = pd.DataFrame({'first': ['John', 'Emmy'], 'last': ['Block','Winehosue'], 'email': ['johnny@gmail.com', 'em@gg.pl']})

people_df = pd.concat([people_df, new_rows], ignore_index=True)
people_df

Unnamed: 0,email,full_name,first,last
0,corey@gmail.com,Corey Kowalski,Corey,Kowalski
1,j.doe@yahoo.com,John Doe,John,Doe
2,nick@gmail.com,Nickolas Doe,Nickolas,Doe
3,johnny@gmail.com,,John,Block
4,em@gg.pl,,Emmy,Winehosue


In [83]:
people_df.drop(index=[4,3], inplace=True)
people_df

Unnamed: 0,email,full_name,first,last
0,corey@gmail.com,Corey Kowalski,Corey,Kowalski
1,j.doe@yahoo.com,John Doe,John,Doe
2,nick@gmail.com,Nickolas Doe,Nickolas,Doe


In [84]:
filtr = people_df['last'] == 'Doe'
people_df.drop(index=people_df[filtr].index ,inplace=False)

Unnamed: 0,email,full_name,first,last
0,corey@gmail.com,Corey Kowalski,Corey,Kowalski


## Sorting data

In [None]:
people_df.sort_values(by=['last', 'first'], ascending=[True, False
], inplace=True)
people_df

Unnamed: 0,email,full_name,first,last
2,nick@gmail.com,Nickolas Doe,Nickolas,Doe
1,j.doe@yahoo.com,John Doe,John,Doe
0,corey@gmail.com,Corey Kowalski,Corey,Kowalski


In [None]:
people_df.sort_index(inplace=True)

In [None]:
people_df['email'].sort_values()

0    corey@gmail.com
1    j.doe@yahoo.com
2     nick@gmail.com
Name: email, dtype: object

In [None]:
df['ConvertedComp'].nlargest()

Respondent
58     2000000.0
102    2000000.0
166    2000000.0
436    2000000.0
452    2000000.0
Name: ConvertedComp, dtype: float64

In [None]:
df.nlargest(10, 'ConvertedComp')

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
58,I am a developer by profession,True,Once a month or more often,"OSS is, on average, of LOWER quality than prop...",Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Received on-the-job training in software devel...,,"Developer, back-end;Developer, desktop or ente...",28,19,23,Very satisfied,Very satisfied,Very confident,Yes,No,I am not interested in new job opportunities,1-2 years ago,,Yes,Office environment or company culture;Remote w...,Re-entry into the workforce,USD,United States dollar,113000.0,Weekly,2000000.0,40.0,There's no schedule or spec; I work on what se...,Being tasked with non-development work;Non-wor...,"Less than half the time, but at least one day ...",Home,A little above average,"Yes, because I see value in code review",1.0,"No, but I think we should",Developers and management have nearly equal in...,I have a great deal of influence,C#;Java;SQL,C#;F#;Java;Kotlin;SQL,Microsoft SQL Server;Oracle;SQLite,Microsoft SQL Server;Oracle;SQLite,Android;Windows,Android;Raspberry Pi;Windows,ASP.NET;jQuery,Angular/Angular.js;ASP.NET;jQuery,.NET,Hadoop;.NET;.NET Core;Node.js;Puppet;Xamarin,Android Studio;Visual Studio,Windows,I do not use containers,,,Yes,Yes,Yes,I don't use social media,In real life (in person),Login,I don't remember,Multiple times per day,Find answers to specific questions,More than 10 times per week,Stack Overflow was much faster,,Yes,Less than once per month or monthly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","No, not really",Just as welcome now as I felt last year,,47.0,Man,No,Straight / Heterosexual,White or of European descent,Yes,,Easy
102,I am a developer by profession,False,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...","Taught yourself a new language, framework, or ...","5,000 to 9,999 employees","Developer, full-stack",8,29,5,Slightly satisfied,Slightly satisfied,Somewhat confident,No,No,"I’m not actively looking, but I am open to new...",1-2 years ago,"Write any code;Write code by hand (e.g., on a ...",Yes,Office environment or company culture;Opportun...,"Something else changed (education, award, medi...",USD,United States dollar,67800.0,Weekly,2000000.0,40.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Distrac...,Less than once per month / Never,Office,Average,No,,"No, but I think we should",Not sure,I have some influence,C#;HTML/CSS;JavaScript;SQL;TypeScript,C;C++;Elixir;Go;Ruby;WebAssembly,Microsoft SQL Server,MongoDB;PostgreSQL;SQLite,Microsoft Azure,AWS;Kubernetes;Microsoft Azure,ASP.NET;jQuery;React.js;Other(s):,Angular/Angular.js;Vue.js,.NET;.NET Core,Node.js,Notepad++;Visual Studio;Visual Studio Code,Windows,I do not use containers,Non-currency applications of blockchain,Useful for immutable record keeping outside of...,No,Yes,Yes,I don't use social media,In real life (in person),Username,2012,Daily or almost daily,Find answers to specific questions;Learn how t...,1-2 times per week,Stack Overflow was much faster,60+ minutes,Yes,Less than once per month or monthly,Yes,"No, I've heard of them, but I am not part of a...",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,37.0,Man,No,Straight / Heterosexual,White or of European descent,Yes,Too long,Easy
166,I am a developer by profession,True,Less than once per year,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","A social science (ex. anthropology, psychology...",Participated in a full-time developer training...,20 to 99 employees,"Developer, back-end;Developer, front-end;Devel...",7,15,6,Slightly satisfied,Slightly satisfied,Very confident,No,Not sure,"I’m not actively looking, but I am open to new...",3-4 years ago,Write any code;Complete a take-home project;So...,No,Financial performance or funding status of the...,I had a negative experience or interaction at ...,USD,United States dollar,137000.0,Weekly,2000000.0,45.0,There is a schedule and/or spec (made by me or...,Distracting work environment;Not enough people...,Less than once per month / Never,Home,A little above average,"Yes, because I see value in code review",8.0,"Yes, it's part of our process","The CTO, CIO, or other management purchase new...",I have some influence,Bash/Shell/PowerShell;Go;HTML/CSS;Java;JavaScr...,Bash/Shell/PowerShell;HTML/CSS;Java;JavaScript...,DynamoDB;Elasticsearch;MongoDB;PostgreSQL;Redi...,PostgreSQL;Redis;SQLite,AWS;Docker;Linux,AWS;Docker;iOS;Kubernetes;Linux,jQuery;React.js;Ruby on Rails,React.js;Ruby on Rails,,,Vim,Linux-based,Development;Testing;Production,Not at all,A passing fad,Yes,SIGH,Yes,Twitter,Online,Username,2011,A few times per month or weekly,Find answers to specific questions,Less than once per week,Stack Overflow was slightly faster,11-30 minutes,Yes,Less than once per month or monthly,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","No, not at all",Just as welcome now as I felt last year,,30.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Neither easy nor difficult
436,I am a developer by profession,True,Less than once per year,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,20 to 99 employees,"Database administrator;Developer, back-end;Dev...",20,18,17,Slightly satisfied,Slightly satisfied,Somewhat confident,Yes,I am already a manager,"I’m not actively looking, but I am open to new...",3-4 years ago,"Write any code;Write code by hand (e.g., on a ...",No,Specific department or team I'd be working on;...,"My job status changed (promotion, new job, etc.)",USD,United States dollar,85000.0,Weekly,2000000.0,45.0,There is a schedule and/or spec (made by me or...,Lack of support from management;Meetings;Not e...,A few days each month,Office,Far above average,"Yes, because I see value in code review",,"No, but I think we should","The CTO, CIO, or other management purchase new...",I have some influence,Bash/Shell/PowerShell;HTML/CSS;Java;JavaScript...,Bash/Shell/PowerShell;Go;HTML/CSS;JavaScript;P...,Microsoft SQL Server;MySQL;Redis;SQLite,Couchbase;MySQL;Oracle;Redis,Android;AWS;Docker;Google Cloud Platform;Linux...,Android;Arduino;Docker;Google Cloud Platform;L...,jQuery;Laravel;React.js;Vue.js,Laravel;React.js;Vue.js,Node.js,Node.js;React Native;TensorFlow,Android Studio;Atom;Sublime Text;Visual Studio...,Windows,Development;Testing;Production,Not at all,Useful across many domains and could change ma...,Yes,SIGH,Yes,Reddit,Neither,Username,2010,Daily or almost daily,Find answers to specific questions;Contribute ...,3-5 times per week,They were about the same,,Yes,A few times per month or weekly,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Tech...,38.0,Man,No,Straight / Heterosexual,White or of European descent,Yes,Too long,Easy
452,I am a developer by profession,True,Never,The quality of OSS and closed source software ...,Employed full-time,United States,"Yes, full-time",I never completed any formal education,,Taken an online course in programming or softw...,100 to 499 employees,"Database administrator;Developer, back-end",7,28,7,Very satisfied,Very satisfied,Very confident,No,No,"I’m not actively looking, but I am open to new...",Less than a year ago,Solve a brain-teaser style puzzle;Interview wi...,No,Specific department or team I'd be working on;...,"Something else changed (education, award, medi...",USD,United States dollar,75000.0,Weekly,2000000.0,40.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Meeting...,A few days each month,Home,Average,"Yes, because I see value in code review",5.0,"Yes, it's part of our process",Developers and management have nearly equal in...,I have little or no influence,SQL;VBA,Python;SQL,Microsoft SQL Server,MongoDB;Microsoft SQL Server;Oracle;PostgreSQL,Windows,AWS;Linux;Microsoft Azure;Windows,,ASP.NET,.NET,.NET,Notepad++;Visual Studio,Windows,I do not use containers,Not at all,,Yes,"Fortunately, someone else has that title",What?,Facebook,In real life (in person),Screen Name,2013,Daily or almost daily,Find answers to specific questions;Learn how t...,1-2 times per week,Stack Overflow was slightly faster,0-10 minutes,Yes,Multiple times per day,Yes,"No, I've heard of them, but I am not part of a...","Yes, definitely",Somewhat more welcome now than last year,Tech articles written by other developers;Cour...,35.0,Man,No,,White or of European descent,No,Appropriate in length,Easy
491,I am a developer by profession,True,Less than once per year,,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,"10,000 or more employees","Developer, full-stack;Developer, mobile",4,18,Less than 1 year,Very satisfied,Very satisfied,Very confident,No,Not sure,I am not interested in new job opportunities,Less than a year ago,"Write any code;Write code by hand (e.g., on a ...",No,Specific department or team I'd be working on;...,I was preparing for a job search,USD,United States dollar,160000.0,Weekly,2000000.0,45.0,There is a schedule and/or spec (made by me or...,Distracting work environment;Inadequate access...,Less than once per month / Never,Office,Average,"Yes, because I see value in code review",2.0,"Yes, it's part of our process",Not sure,I have little or no influence,Java;Objective-C,Java;Objective-C;Swift,,,Android;iOS,Android;iOS,,,,React Native;Unity 3D,Android Studio;IntelliJ;Vim;Xcode,MacOS,"Outside of work, for personal projects",,A passing fad,Yes,SIGH,What?,YouTube,In real life (in person),Username,2014,A few times per week,Find answers to specific questions;Learn how t...,1-2 times per week,They were about the same,,No,,Yes,"No, I've heard of them, but I am not part of a...",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Tech...,22.0,Man,No,Straight / Heterosexual,Hispanic or Latino/Latina;White or of European...,No,Appropriate in length,Neither easy nor difficult
539,I am a developer by profession,False,Less than once a month but more than once per ...,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,United States,No,Some college/university study without earning ...,Mathematics or statistics,Taken an online course in programming or softw...,"10,000 or more employees",Data scientist or machine learning specialist;...,22,14,20,Very satisfied,Slightly satisfied,Very confident,Yes,No,"I’m not actively looking, but I am open to new...",1-2 years ago,Write any code;Complete a take-home project;In...,Yes,"Languages, frameworks, and other technologies ...",I was preparing for a job search,USD,United States dollar,140000.0,Weekly,2000000.0,50.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Non-wor...,A few days each month,Office,A little above average,"Yes, because I see value in code review",10.0,"Yes, it's not part of our process but the deve...","The CTO, CIO, or other management purchase new...",I have little or no influence,Bash/Shell/PowerShell;JavaScript;Python;SQL,Go;JavaScript;Python;TypeScript,PostgreSQL;SQLite,Couchbase;MongoDB;PostgreSQL;Redis;SQLite,AWS;Docker;Linux;MacOS;Slack,AWS;Docker;Google Cloud Platform;Linux;MacOS,Express;Flask;React.js,Flask;React.js,Node.js;Pandas;TensorFlow;Torch/PyTorch,Node.js;Torch/PyTorch,PyCharm;Vim;Visual Studio Code,MacOS,Development;Testing;Production,Not at all,A passing fad,No,Also Yes,Yes,Reddit,In real life (in person),Username,I don't remember,Daily or almost daily,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,0-10 minutes,Yes,I have never participated in Q&A on Stack Over...,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","No, not really",Just as welcome now as I felt last year,Courses on technologies you're interested in,40.0,Man,No,Straight / Heterosexual,White or of European descent,Yes,Appropriate in length,Easy
770,I am a developer by profession,True,Less than once per year,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Master’s degree (MA, MS, M.Eng., MBA, etc.)","A humanities discipline (ex. literature, histo...",Taken an online course in programming or softw...,"1,000 to 4,999 employees","Developer, full-stack",5,17,3,Very satisfied,Very satisfied,Somewhat confident,No,Not sure,I am not interested in new job opportunities,1-2 years ago,Interview with people in peer roles;Interview ...,No,Office environment or company culture;Remote w...,"Something else changed (education, award, medi...",USD,United States dollar,68000.0,Weekly,2000000.0,38.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Non-wor...,All or almost all the time (I'm full-time remote),Home,Average,"Yes, because I see value in code review",6.0,"Yes, it's part of our process",Developers and management have nearly equal in...,I have little or no influence,Go;HTML/CSS;JavaScript;Python;Ruby;SQL,Go;HTML/CSS;Python;Ruby;TypeScript;WebAssembly,MariaDB;MySQL;SQLite,Elasticsearch;MariaDB;PostgreSQL,AWS;Docker;Linux;Raspberry Pi;Slack,Arduino;AWS;Docker;Linux;Raspberry Pi;Slack,Angular/Angular.js;jQuery;Ruby on Rails,Angular/Angular.js;Django;Ruby on Rails,Node.js,Apache Spark;TensorFlow;Torch/PyTorch,PyCharm;RubyMine;Sublime Text,Linux-based,Development;Testing,Non-currency applications of blockchain,,No,SIGH,Yes,Reddit,Online,Username,2011,Daily or almost daily,Find answers to specific questions;Learn how t...,1-2 times per week,Stack Overflow was slightly faster,31-60 minutes,Yes,I have never participated in Q&A on Stack Over...,Yes,"No, I've heard of them, but I am not part of a...",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Tech...,29.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
789,I am a developer by profession,True,Less than once per year,The quality of OSS and closed source software ...,Employed full-time,United States,"Yes, full-time","Master’s degree (MA, MS, M.Eng., MBA, etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,"10,000 or more employees","Developer, back-end;Engineer, site reliability",12,9,6,Slightly dissatisfied,Neither satisfied nor dissatisfied,Not at all confident,Yes,Not sure,"I’m not actively looking, but I am open to new...",3-4 years ago,"Write any code;Write code by hand (e.g., on a ...",No,Specific department or team I'd be working on;...,I had a negative experience or interaction at ...,USD,United States dollar,180000.0,Monthly,2000000.0,40.0,There is a schedule and/or spec (made by me or...,Lack of support from management;Toxic work env...,A few days each month,Office,A little above average,"Yes, because I see value in code review",2.0,"Yes, it's not part of our process but the deve...",Developers and management have nearly equal in...,I have little or no influence,C#;F#;Python,F#;Go;Python;Rust,Elasticsearch;Other(s):,Other(s):,Docker;Microsoft Azure,Docker,ASP.NET,,.NET;.NET Core,.NET Core;TensorFlow,IntelliJ;Notepad++;Visual Studio;Visual Studio...,MacOS,Development;Testing;Production,Non-currency applications of blockchain,An irresponsible use of resources,No,"Fortunately, someone else has that title",Yes,Facebook,Online,Handle,2009,Less than once per month or monthly,Find answers to specific questions,Less than once per week,Stack Overflow was slightly faster,31-60 minutes,Yes,I have never participated in Q&A on Stack Over...,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","No, not really",Just as welcome now as I felt last year,Tech articles written by other developers;Cour...,31.0,Woman,No,Bisexual,South Asian,No,Appropriate in length,Neither easy nor difficult
1232,I am a developer by profession,False,Never,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,2-9 employees,"Developer, front-end;Developer, game or graphi...",4,15,2,Slightly satisfied,Slightly dissatisfied,Somewhat confident,Yes,Yes,"I’m not actively looking, but I am open to new...",1-2 years ago,"Write code by hand (e.g., on a whiteboard);Sol...",No,Industry that I'd be working in;Specific depar...,"Something else changed (education, award, medi...",USD,United States dollar,800000.0,Weekly,2000000.0,40.0,There is a schedule and/or spec (made by me or...,Distracting work environment;Non-work commitme...,A few days each month,Office,A little below average,"Yes, because I see value in code review",5.0,"No, but I think we should",Developers and management have nearly equal in...,I have little or no influence,HTML/CSS;JavaScript;SQL,Java;Kotlin;Objective-C;Python;Swift;TypeScrip...,MySQL,MongoDB,,Android;AWS;iOS;MacOS;Windows,React.js,React.js,Node.js;Other(s):,Node.js;Other(s):,Visual Studio Code,MacOS,I do not use containers,Non-currency applications of blockchain,Useful across many domains and could change ma...,Yes,Yes,What?,WeChat 微信,In real life (in person),Username,2014,A few times per week,Find answers to specific questions,1-2 times per week,Stack Overflow was slightly faster,11-30 minutes,Yes,Less than once per month or monthly,Yes,"No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers,25.0,Man,No,Straight / Heterosexual,East Asian,No,Appropriate in length,Neither easy nor difficult


## Grouping and Aggregating

In [None]:
df['ConvertedComp'].median()

57287.0

In [None]:
df.describe()

Unnamed: 0,CompTotal,ConvertedComp,WorkWeekHrs,CodeRevHrs,Age
count,55945.0,55823.0,64503.0,49790.0,79210.0
mean,551901400000.0,127110.7,42.127197,5.084308,30.336699
std,73319260000000.0,284152.3,37.28761,5.513931,9.17839
min,0.0,0.0,1.0,0.0,1.0
25%,20000.0,25777.5,40.0,2.0,24.0
50%,62000.0,57287.0,40.0,4.0,29.0
75%,120000.0,100000.0,44.75,6.0,35.0
max,1e+16,2000000.0,4850.0,99.0,99.0


In [None]:
df.count().nlargest()

Hobbyist       88883
OpenSourcer    88883
Country        88751
MainBranch     88331
SOVisitFreq    88263
dtype: int64

In [None]:
df['Hobbyist'].value_counts()

Hobbyist
True     71257
False    17626
Name: count, dtype: int64

In [None]:
df['SocialMedia'].value_counts()

SocialMedia
Reddit                      14374
YouTube                     13830
WhatsApp                    13347
Facebook                    13178
Twitter                     11398
Instagram                    6261
I don't use social media     5554
LinkedIn                     4501
WeChat 微信                     667
Snapchat                      628
VK ВКонта́кте                 603
Weibo 新浪微博                     56
Youku Tudou 优酷                 21
Hello                          19
Name: count, dtype: int64

In [None]:
df['SocialMedia'].value_counts(normalize=True)

SocialMedia
Reddit                      0.170233
YouTube                     0.163791
WhatsApp                    0.158071
Facebook                    0.156069
Twitter                     0.134988
Instagram                   0.074150
I don't use social media    0.065777
LinkedIn                    0.053306
WeChat 微信                   0.007899
Snapchat                    0.007437
VK ВКонта́кте               0.007141
Weibo 新浪微博                  0.000663
Youku Tudou 优酷              0.000249
Hello                       0.000225
Name: proportion, dtype: float64

In [None]:
# How many people answered question (NaN -> skiped answer)
df.count().nlargest()

Hobbyist       88883
OpenSourcer    88883
Country        88751
MainBranch     88331
SOVisitFreq    88263
dtype: int64

In [None]:
df['Country'].value_counts()

Country
United States        20949
India                 9061
Germany               5866
United Kingdom        5737
Canada                3395
                     ...  
Tonga                    1
Timor-Leste              1
North Korea              1
Brunei Darussalam        1
Chad                     1
Name: count, Length: 179, dtype: int64

In [None]:
country_group = df.groupby(['Country'])

In [None]:
country_group.get_group('United States')

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
4,I am a developer by profession,False,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,100 to 499 employees,"Developer, full-stack",3,16,Less than 1 year,Very satisfied,Slightly satisfied,Very confident,No,Not sure,I am not interested in new job opportunities,Less than a year ago,"Write code by hand (e.g., on a whiteboard);Int...",No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,USD,United States dollar,61000.0,Yearly,61000.0,80.0,There's no schedule or spec; I work on what se...,,Less than once per month / Never,Home,A little below average,No,,"No, but I think we should",Developers typically have the most influence o...,I have little or no influence,C;C++;C#;Python;SQL,C;C#;JavaScript;SQL,MySQL;SQLite,MySQL;SQLite,Linux;Windows,Linux;Windows,,,.NET,.NET,Eclipse;Vim;Visual Studio;Visual Studio Code,Windows,I do not use containers,Not at all,"Useful for decentralized currency (i.e., Bitcoin)",Yes,SIGH,Yes,Reddit,In real life (in person),Username,2014,Daily or almost daily,Find answers to specific questions;Pass the ti...,1-2 times per week,Stack Overflow was much faster,31-60 minutes,Yes,Less than once per month or monthly,Yes,"No, and I don't know what those are","No, not really",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
13,I am a developer by profession,True,Less than once a month but more than once per ...,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,United States,No,"Master’s degree (MA, MS, M.Eng., MBA, etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,10 to 19 employees,Data or business analyst;Database administrato...,17,11,8,Very satisfied,Very satisfied,,,,I am not interested in new job opportunities,3-4 years ago,Complete a take-home project;Interview with pe...,Yes,"Languages, frameworks, and other technologies ...",I was preparing for a job search,USD,United States dollar,90000.0,Yearly,90000.0,40.0,There is a schedule and/or spec (made by me or...,"Meetings;Non-work commitments (parenting, scho...",All or almost all the time (I'm full-time remote),Home,A little above average,"Yes, because I see value in code review",5.0,"No, but I think we should",Developers and management have nearly equal in...,I have a great deal of influence,Bash/Shell/PowerShell;HTML/CSS;JavaScript;PHP;...,Bash/Shell/PowerShell;HTML/CSS;JavaScript;Rust...,Couchbase;DynamoDB;Firebase;MySQL,Firebase;MySQL;Redis,Android;AWS;Docker;IBM Cloud or Watson;iOS;Lin...,Android;AWS;Docker;IBM Cloud or Watson;Linux;S...,Angular/Angular.js;ASP.NET;Express;jQuery;Vue.js,Express;Vue.js,Node.js;Xamarin,Node.js;TensorFlow,Vim;Visual Studio;Visual Studio Code;Xcode,Windows,Development;Testing;Production,Not at all,"Useful for decentralized currency (i.e., Bitcoin)",Yes,Yes,Yes,Twitter,In real life (in person),Username,2011,Multiple times per day,Find answers to specific questions,More than 10 times per week,Stack Overflow was much faster,11-30 minutes,Yes,Less than once per month or monthly,Yes,"No, I've heard of them, but I am not part of a...",Neutral,Somewhat more welcome now than last year,Tech articles written by other developers;Cour...,28.0,Man,No,Straight / Heterosexual,White or of European descent,Yes,Appropriate in length,Easy
22,I am a developer by profession,True,Less than once per year,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,United States,No,Some college/university study without earning ...,,Taken an online course in programming or softw...,"10,000 or more employees","Data or business analyst;Designer;Developer, b...",35,12,18,Slightly satisfied,Very dissatisfied,Somewhat confident,No,No,"I’m not actively looking, but I am open to new...",More than 4 years ago,Interview with people in senior / management r...,No,Industry that I'd be working in;Financial perf...,I had a negative experience or interaction at ...,USD,United States dollar,103000.0,Yearly,103000.0,40.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Meeting...,"Less than half the time, but at least one day ...",Home,Average,No,,"No, but I think we should","The CTO, CIO, or other management purchase new...",I have little or no influence,Bash/Shell/PowerShell;C++;HTML/CSS;JavaScript;...,Bash/Shell/PowerShell;C++;HTML/CSS;JavaScript;...,Elasticsearch;MySQL;Oracle;Redis,Elasticsearch;MySQL;Oracle;Redis,Docker;Linux;Raspberry Pi;Windows,Docker;Linux;Raspberry Pi;Windows,Angular/Angular.js;Ruby on Rails,Angular/Angular.js;Ruby on Rails,Node.js,Node.js,Sublime Text;Visual Studio;Visual Studio Code,Windows,"Outside of work, for personal projects",Not at all,,Yes,Yes,Yes,Instagram,Online,Username,I don't remember,Daily or almost daily,Find answers to specific questions,3-5 times per week,Stack Overflow was much faster,0-10 minutes,Yes,A few times per week,Yes,"No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,47.0,Man,No,Straight / Heterosexual,White or of European descent,Yes,Appropriate in length,Easy
23,I am a developer by profession,True,Less than once per year,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Information systems, information technology, o...",Taken an online course in programming or softw...,"10,000 or more employees","Developer, full-stack",3,19,1,Slightly satisfied,Slightly satisfied,Very confident,No,Not sure,"I’m not actively looking, but I am open to new...",Less than a year ago,"Write any code;Write code by hand (e.g., on a ...",No,Opportunities for professional development;How...,I was preparing for a job search,USD,United States dollar,69000.0,Yearly,69000.0,40.0,There is a schedule and/or spec (made by me or...,Distracting work environment;Meetings;Non-work...,A few days each month,Office,Average,"Yes, because I see value in code review",8.0,"Yes, it's part of our process",Developers and management have nearly equal in...,I have little or no influence,Bash/Shell/PowerShell;HTML/CSS;JavaScript;Pyth...,Bash/Shell/PowerShell;Go;HTML/CSS;Java;JavaScr...,Oracle;SQLite,Couchbase;DynamoDB;Elasticsearch;Firebase;Oracle,Docker;Google Cloud Platform,Docker;iOS;Slack,React.js;Ruby on Rails,Express;React.js;Ruby on Rails;Vue.js,,React Native;TensorFlow,Visual Studio Code,MacOS,Development;Testing;Production,,Useful for immutable record keeping outside of...,Yes,SIGH,Yes,Reddit,In real life (in person),Username,2014,Multiple times per day,Find answers to specific questions;Learn how t...,6-10 times per week,They were about the same,,Yes,I have never participated in Q&A on Stack Over...,Yes,"No, I've heard of them, but I am not part of a...","No, not really",Just as welcome now as I felt last year,Tech articles written by other developers;Tech...,22.0,Man,No,Straight / Heterosexual,Black or of African descent,No,Appropriate in length,Easy
26,I am a developer by profession,True,Less than once per year,The quality of OSS and closed source software ...,Employed full-time,United States,No,Some college/university study without earning ...,"Computer science, computer engineering, or sof...","Taught yourself a new language, framework, or ...","10,000 or more employees","Designer;Developer, back-end;Developer, deskto...",12,8,8,Very satisfied,Very satisfied,,,,"I’m not actively looking, but I am open to new...",Less than a year ago,Interview with people in peer roles;Interview ...,No,Remote work options;Diversity of the company o...,I was preparing for a job search,USD,United States dollar,114000.0,Yearly,114000.0,40.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Meeting...,"Less than half the time, but at least one day ...",Home,Far above average,"Yes, because I see value in code review",2.0,"Yes, it's not part of our process but the deve...",Developers typically have the most influence o...,I have a great deal of influence,Bash/Shell/PowerShell;C++;C#;HTML/CSS;JavaScri...,C#;HTML/CSS;JavaScript;Objective-C;Ruby;SQL;Sw...,Microsoft SQL Server;MySQL;Redis;SQLite,Microsoft SQL Server;MySQL;Redis;SQLite,AWS;Docker;Linux;MacOS;Microsoft Azure;Windows...,Android;Docker;iOS;Linux;MacOS;Microsoft Azure...,Angular/Angular.js;ASP.NET;Drupal;Express;jQue...,Angular/Angular.js;ASP.NET,.NET;.NET Core;Node.js;Xamarin,.NET;.NET Core;Node.js,Notepad++;Sublime Text;Vim;Visual Studio;Xcode,MacOS,Development;Testing,Not at all,A passing fad,Yes,SIGH,Yes,I don't use social media,In real life (in person),Username,2008,Daily or almost daily,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,11-30 minutes,Yes,Less than once per month or monthly,Yes,"No, I've heard of them, but I am not part of a...",Neutral,Just as welcome now as I felt last year,,34.0,Man,No,Gay or Lesbian,,No,Appropriate in length,Easy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78292,,False,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...","Independent contractor, freelancer, or self-em...",United States,No,"Other doctoral degree (Ph.D, Ed.D., etc.)","A health science (ex. nursing, pharmacy, radio...",Completed an industry certification program (e...,"Just me - I am a freelancer, sole proprietor, ...",Academic researcher,42,14,31,,,,,,,,,,,,,,,,,,,,,,,,,,,,Bash/Shell/PowerShell;C;Python,Bash/Shell/PowerShell;C;Python,SQLite,SQLite,Linux;Raspberry Pi;Other(s):,Linux;Raspberry Pi;Other(s):,,,Chef,,Emacs;IPython / Jupyter,Linux-based,I do not use containers,,Useful for immutable record keeping outside of...,No,Yes,Yes,I don't use social media,In real life (in person),,2013,A few times per week,Find answers to specific questions,Less than once per week,The other resource was slightly faster,11-30 minutes,Not sure / can't remember,,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are","No, not really",Somewhat less welcome now than last year,,60.0,Man,No,Straight / Heterosexual,White or of European descent,Yes,Too long,Neither easy nor difficult
82717,,False,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",United States,No,"Secondary school (e.g. American high school, G...",,,,,Less than 1 year,,Less than 1 year,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Android;Windows,Android;Microsoft Azure;Windows,,,,,,MacOS,Testing,,,No,SIGH,Yes,Facebook,In real life (in person),Username,2018,Less than once per month or monthly,Find answers to specific questions,Less than once per week,,60+ minutes,No,,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...",Not sure,,Industry news about technologies you're intere...,44.0,Man,No,Straight / Heterosexual,White or of European descent,Yes,Appropriate in length,Neither easy nor difficult
83397,,True,Less than once per year,,"Not employed, but looking for work",United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,,,12,9,Less than 1 year,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;JavaScript;Python;SQL,C;C++;C#;Go;Java;JavaScript;Python;R;Ruby;SQL;...,,,Android;Arduino;Slack,Android;Arduino;Docker;iOS;Raspberry Pi;Slack,Flask,Django;Drupal;Flask;jQuery;React.js,,Chef;Torch/PyTorch,Eclipse;IPython / Jupyter;Sublime Text,MacOS,I do not use containers,,,,SIGH,Yes,,,Handle,I don't remember,A few times per week,Find answers to specific questions;Learn how t...,3-5 times per week,They were about the same,,Not sure / can't remember,,Yes,"No, and I don't know what those are","No, not at all",Just as welcome now as I felt last year,,27.0,Woman,No,Bisexual,White or of European descent,No,Appropriate in length,Easy
85642,,False,Less than once per year,"OSS is, on average, of LOWER quality than prop...","Independent contractor, freelancer, or self-em...",United States,No,Associate degree,"Information systems, information technology, o...",Taken an online course in programming or softw...,"Just me - I am a freelancer, sole proprietor, ...",Designer;Marketing or sales professional,20,7,Less than 1 year,,,,,,,,,,,,,,,,,,,,,,,,,,,,Go;HTML/CSS,,,,,,,,,,Visual Studio Code,Windows,I do not use containers,,Useful for immutable record keeping outside of...,No,SIGH,Yes,,In real life (in person),Handle,2008,Less than once per month or monthly,Find answers to specific questions,Less than once per week,Stack Overflow was slightly faster,60+ minutes,Yes,I have never participated in Q&A on Stack Over...,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","No, not at all",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,34.0,"Non-binary, genderqueer, or gender non-conforming",,Bisexual;Gay or Lesbian,White or of European descent,No,Appropriate in length,Easy


In [None]:
filtr = df['Country'] == 'United States'
df.loc[filtr]['SocialMedia'].value_counts()

SocialMedia
Reddit                      5700
Twitter                     3468
Facebook                    2844
YouTube                     2463
I don't use social media    1851
Instagram                   1652
LinkedIn                    1020
WhatsApp                     609
Snapchat                     326
WeChat 微信                     93
VK ВКонта́кте                  9
Weibo 新浪微博                     8
Hello                          2
Youku Tudou 优酷                 1
Name: count, dtype: int64

In [None]:
# Without filter
country_group['SocialMedia'].value_counts(normalize=True).loc['India']

SocialMedia
WhatsApp                    0.342379
YouTube                     0.208405
LinkedIn                    0.109355
Facebook                    0.096301
Instagram                   0.094126
Twitter                     0.062063
Reddit                      0.054162
I don't use social media    0.028627
Snapchat                    0.002634
Hello                       0.000573
WeChat 微信                   0.000573
VK ВКонта́кте               0.000458
Youku Tudou 优酷              0.000229
Weibo 新浪微博                  0.000115
Name: proportion, dtype: float64

In [None]:
country_group['ConvertedComp'].median().loc['Germany']

63016.0

In [None]:
country_group['ConvertedComp'].agg(['median', 'mean']).loc['Canada']

median     68705.000000
mean      134018.564909
Name: Canada, dtype: float64

In [None]:
country_group['LanguageWorkedWith'].apply(lambda x: x.str.contains('Python').sum())

Country
Afghanistan                              8
Albania                                 23
Algeria                                 40
Andorra                                  0
Angola                                   2
                                        ..
Venezuela, Bolivarian Republic of...    28
Viet Nam                                78
Yemen                                    3
Zambia                                   4
Zimbabwe                                14
Name: LanguageWorkedWith, Length: 179, dtype: int64

In [None]:
country_group['LanguageWorkedWith'].apply(lambda x: x.str.contains('Python').sum() * 100 / x.shape[0])

Country
Afghanistan                             18.181818
Albania                                 26.744186
Algeria                                 29.850746
Andorra                                  0.000000
Angola                                  40.000000
                                          ...    
Venezuela, Bolivarian Republic of...    31.818182
Viet Nam                                33.766234
Yemen                                   15.789474
Zambia                                  33.333333
Zimbabwe                                35.897436
Name: LanguageWorkedWith, Length: 179, dtype: float64

In [None]:
df[df['Country'] == 'India']['LanguageWorkedWith'].str.contains('Python').sum()

3105

In [None]:
country_uses_python = country_group['LanguageWorkedWith'].apply(lambda x: x.str.contains('Python').sum())
country_corespondents = df['Country'].value_counts()

In [None]:
python_df = pd.concat([country_corespondents, country_uses_python], axis='columns')

In [None]:
python_df.rename(columns={'count': 'Respondents', 'LanguageWorkedWith': 'Know python'}, inplace=True)
python_df

Unnamed: 0_level_0,Respondents,Know python
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
United States,20949,10083
India,9061,3105
Germany,5866,2451
United Kingdom,5737,2384
Canada,3395,1558
...,...,...
Tonga,1,0
Timor-Leste,1,1
North Korea,1,0
Brunei Darussalam,1,0


In [None]:
python_df['Percent'] = python_df['Know python'] * 100 / python_df['Respondents']

In [None]:
python_df.sort_values(by='Percent', ascending=False)

Unnamed: 0_level_0,Respondents,Know python,Percent
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sao Tome and Principe,1,1,100.000000
Timor-Leste,1,1,100.000000
Dominica,1,1,100.000000
Niger,1,1,100.000000
Turkmenistan,7,6,85.714286
...,...,...,...
Cape Verde,3,0,0.000000
Lao People's Democratic Republic,3,0,0.000000
Malawi,2,0,0.000000
Liberia,2,0,0.000000


## Cleaning Data

In [None]:
# Deletes rows with NaN values (Python - None, Numpy - np.na) values
# 'rows' or 0 drops rows, 'columns' or 1 drops entire columns
# 'any' drops rows if at least 1 value in row is empty (is defalut), 'all' only drops rows with all empty values
# subset=[list of columns to consider]
df.dropna(axis='rows', how='any', subset=[]) 

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
1,I am a student who is learning to code,True,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,4,10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;Java;JavaScript;Python,C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL,SQLite,MySQL,MacOS;Windows,Android;Arduino;Windows,Django;Flask,Flask;jQuery,Node.js,Node.js,IntelliJ;Notepad++;PyCharm,Windows,I do not use containers,,,Yes,"Fortunately, someone else has that title",Yes,Twitter,Online,Username,2017,A few times per month or weekly,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,31-60 minutes,No,,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,I am a student who is learning to code,False,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,Taken an online course in programming or softw...,,"Developer, desktop or enterprise applications;...",,17,,,,,,,I am actively looking for a job,I've never had a job,,,Financial performance or funding status of the...,"Something else changed (education, award, medi...",,,,,,,,,,,,,,,,,C++;HTML/CSS;Python,C++;HTML/CSS;JavaScript;SQL,,MySQL,Windows,Windows,Django,Django,,,Atom;PyCharm,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,Yes,Instagram,Online,Username,2017,Daily or almost daily,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,11-30 minutes,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
3,"I am not primarily a developer, but I write co...",True,Never,The quality of OSS and closed source software ...,Employed full-time,Thailand,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,"Taught yourself a new language, framework, or ...",100 to 499 employees,"Designer;Developer, back-end;Developer, front-...",3,22,1,Slightly satisfied,Slightly satisfied,Not at all confident,Not sure,Not sure,"I’m not actively looking, but I am open to new...",1-2 years ago,Interview with people in peer roles,No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,THB,Thai baht,23000.0,Monthly,8820.0,40.0,There's no schedule or spec; I work on what se...,Distracting work environment;Inadequate access...,Less than once per month / Never,Home,Average,No,,"No, but I think we should",Not sure,I have little or no influence,HTML/CSS,Elixir;HTML/CSS,PostgreSQL,PostgreSQL,,,,Other(s):,,,Vim;Visual Studio Code,Linux-based,I do not use containers,,,Yes,Yes,Yes,Reddit,In real life (in person),Username,2011,A few times per week,Find answers to specific questions;Learn how t...,6-10 times per week,They were about the same,,Yes,Less than once per month or monthly,Yes,"No, I've heard of them, but I am not part of a...",Neutral,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,28.0,Man,No,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult
4,I am a developer by profession,False,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,100 to 499 employees,"Developer, full-stack",3,16,Less than 1 year,Very satisfied,Slightly satisfied,Very confident,No,Not sure,I am not interested in new job opportunities,Less than a year ago,"Write code by hand (e.g., on a whiteboard);Int...",No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,USD,United States dollar,61000.0,Yearly,61000.0,80.0,There's no schedule or spec; I work on what se...,,Less than once per month / Never,Home,A little below average,No,,"No, but I think we should",Developers typically have the most influence o...,I have little or no influence,C;C++;C#;Python;SQL,C;C#;JavaScript;SQL,MySQL;SQLite,MySQL;SQLite,Linux;Windows,Linux;Windows,,,.NET,.NET,Eclipse;Vim;Visual Studio;Visual Studio Code,Windows,I do not use containers,Not at all,"Useful for decentralized currency (i.e., Bitcoin)",Yes,SIGH,Yes,Reddit,In real life (in person),Username,2014,Daily or almost daily,Find answers to specific questions;Pass the ti...,1-2 times per week,Stack Overflow was much faster,31-60 minutes,Yes,Less than once per month or monthly,Yes,"No, and I don't know what those are","No, not really",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
5,I am a developer by profession,True,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Ukraine,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,"10,000 or more employees","Academic researcher;Developer, desktop or ente...",16,14,9,Very dissatisfied,Slightly dissatisfied,Somewhat confident,Yes,No,I am not interested in new job opportunities,Less than a year ago,"Write any code;Write code by hand (e.g., on a ...",No,"Industry that I'd be working in;Languages, fra...",I was preparing for a job search,UAH,Ukrainian hryvnia,,,,55.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Inadequ...,A few days each month,Office,A little above average,"Yes, because I see value in code review",,"Yes, it's part of our process",Not sure,I have little or no influence,C++;HTML/CSS;Java;JavaScript;Python;SQL;VBA,HTML/CSS;Java;JavaScript;SQL;WebAssembly,Couchbase;MongoDB;MySQL;Oracle;PostgreSQL;SQLite,Couchbase;Firebase;MongoDB;MySQL;Oracle;Postgr...,Android;Linux;MacOS;Slack;Windows,Android;Docker;Kubernetes;Linux;Slack,Django;Express;Flask;jQuery;React.js;Spring,Flask;jQuery;React.js;Spring,Cordova;Node.js,Apache Spark;Hadoop;Node.js;React Native,IntelliJ;Notepad++;Vim,Linux-based,"Outside of work, for personal projects",Not at all,,Yes,Also Yes,Yes,Facebook,In real life (in person),Username,I don't remember,Multiple times per day,Find answers to specific questions,More than 10 times per week,Stack Overflow was much faster,,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","Yes, definitely",Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,30.0,Man,No,Straight / Heterosexual,White or of European descent;Multiracial,No,Appropriate in length,Easy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88377,,True,Less than once a month but more than once per ...,The quality of OSS and closed source software ...,"Not employed, and not looking for work",Canada,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;JavaScript;Other(s):,C++;HTML/CSS;JavaScript;SQL;WebAssembly;Other(s):,Firebase;SQLite,Firebase;MySQL;SQLite,Linux,Google Cloud Platform;Linux,jQuery,jQuery;Vue.js,Node.js,React Native;Unity 3D;Unreal Engine,Atom;Visual Studio;Visual Studio Code,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,What?,YouTube,,Username,I don't remember,A few times per week,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was slightly faster,11-30 minutes,Yes,I have never participated in Q&A on Stack Over...,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","No, not at all",,Tech articles written by other developers;Tech...,,Man,No,,,No,Appropriate in length,Easy
88601,,False,Never,The quality of OSS and closed source software ...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
88802,,False,Never,,Employed full-time,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
88816,,False,Never,"OSS is, on average, of HIGHER quality than pro...","Independent contractor, freelancer, or self-em...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [None]:
df.dropna(axis='columns', how='any')

Unnamed: 0_level_0,Hobbyist,OpenSourcer
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1
1,True,Never
2,False,Less than once per year
3,True,Never
4,False,Never
5,True,Once a month or more often
...,...,...
88377,True,Less than once a month but more than once per ...
88601,False,Never
88802,False,Never
88816,False,Never


In [None]:
# Changes all occurences of 'Missing' to 'None' or 'np.nan'
df.replace('Missing', None, inplace=True)
df.replace('Missing', np.nan, inplace=True)

In [None]:
df.isna()

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
1,False,False,False,False,False,False,False,False,True,False,True,True,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False
2,False,False,False,False,False,False,False,False,True,False,True,False,True,False,True,True,True,True,True,True,False,False,True,True,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,True,False,False,False,False,False,True,True,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,True,True,False,True,True,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88377,True,False,False,False,False,False,False,False,True,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,True,True,False,False,False
88601,True,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
88802,True,False,False,True,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
88816,True,False,False,False,False,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True


In [None]:
df.fillna(0)

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,ConvertedComp,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1
1,I am a student who is learning to code,True,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,0,"Taught yourself a new language, framework, or ...",0,0,4,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0.0,0.0,0,0,0,0,0,0,0.0,0,0,0,HTML/CSS;Java;JavaScript;Python,C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL,SQLite,MySQL,MacOS;Windows,Android;Arduino;Windows,Django;Flask,Flask;jQuery,Node.js,Node.js,IntelliJ;Notepad++;PyCharm,Windows,I do not use containers,0,0,Yes,"Fortunately, someone else has that title",Yes,Twitter,Online,Username,2017,A few times per month or weekly,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,31-60 minutes,No,0,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,0,No,Appropriate in length,Neither easy nor difficult
2,I am a student who is learning to code,False,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",0,Taken an online course in programming or softw...,0,"Developer, desktop or enterprise applications;...",0,17,0,0,0,0,0,0,I am actively looking for a job,I've never had a job,0,0,Financial performance or funding status of the...,"Something else changed (education, award, medi...",0,0,0.0,0,0.0,0.0,0,0,0,0,0,0,0.0,0,0,0,C++;HTML/CSS;Python,C++;HTML/CSS;JavaScript;SQL,0,MySQL,Windows,Windows,Django,Django,0,0,Atom;PyCharm,Windows,I do not use containers,0,Useful across many domains and could change ma...,Yes,Yes,Yes,Instagram,Online,Username,2017,Daily or almost daily,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,11-30 minutes,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,0,No,Appropriate in length,Neither easy nor difficult
3,"I am not primarily a developer, but I write co...",True,Never,The quality of OSS and closed source software ...,Employed full-time,Thailand,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,"Taught yourself a new language, framework, or ...",100 to 499 employees,"Designer;Developer, back-end;Developer, front-...",3,22,1,Slightly satisfied,Slightly satisfied,Not at all confident,Not sure,Not sure,"I’m not actively looking, but I am open to new...",1-2 years ago,Interview with people in peer roles,No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,THB,Thai baht,23000.0,Monthly,8820.0,40.0,There's no schedule or spec; I work on what se...,Distracting work environment;Inadequate access...,Less than once per month / Never,Home,Average,No,0.0,"No, but I think we should",Not sure,I have little or no influence,HTML/CSS,Elixir;HTML/CSS,PostgreSQL,PostgreSQL,0,0,0,Other(s):,0,0,Vim;Visual Studio Code,Linux-based,I do not use containers,0,0,Yes,Yes,Yes,Reddit,In real life (in person),Username,2011,A few times per week,Find answers to specific questions;Learn how t...,6-10 times per week,They were about the same,0,Yes,Less than once per month or monthly,Yes,"No, I've heard of them, but I am not part of a...",Neutral,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,28.0,Man,No,Straight / Heterosexual,0,Yes,Appropriate in length,Neither easy nor difficult
4,I am a developer by profession,False,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,100 to 499 employees,"Developer, full-stack",3,16,Less than 1 year,Very satisfied,Slightly satisfied,Very confident,No,Not sure,I am not interested in new job opportunities,Less than a year ago,"Write code by hand (e.g., on a whiteboard);Int...",No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,USD,United States dollar,61000.0,Yearly,61000.0,80.0,There's no schedule or spec; I work on what se...,0,Less than once per month / Never,Home,A little below average,No,0.0,"No, but I think we should",Developers typically have the most influence o...,I have little or no influence,C;C++;C#;Python;SQL,C;C#;JavaScript;SQL,MySQL;SQLite,MySQL;SQLite,Linux;Windows,Linux;Windows,0,0,.NET,.NET,Eclipse;Vim;Visual Studio;Visual Studio Code,Windows,I do not use containers,Not at all,"Useful for decentralized currency (i.e., Bitcoin)",Yes,SIGH,Yes,Reddit,In real life (in person),Username,2014,Daily or almost daily,Find answers to specific questions;Pass the ti...,1-2 times per week,Stack Overflow was much faster,31-60 minutes,Yes,Less than once per month or monthly,Yes,"No, and I don't know what those are","No, not really",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
5,I am a developer by profession,True,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Ukraine,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",Taken an online course in programming or softw...,"10,000 or more employees","Academic researcher;Developer, desktop or ente...",16,14,9,Very dissatisfied,Slightly dissatisfied,Somewhat confident,Yes,No,I am not interested in new job opportunities,Less than a year ago,"Write any code;Write code by hand (e.g., on a ...",No,"Industry that I'd be working in;Languages, fra...",I was preparing for a job search,UAH,Ukrainian hryvnia,0.0,0,0.0,55.0,There is a schedule and/or spec (made by me or...,Being tasked with non-development work;Inadequ...,A few days each month,Office,A little above average,"Yes, because I see value in code review",0.0,"Yes, it's part of our process",Not sure,I have little or no influence,C++;HTML/CSS;Java;JavaScript;Python;SQL;VBA,HTML/CSS;Java;JavaScript;SQL;WebAssembly,Couchbase;MongoDB;MySQL;Oracle;PostgreSQL;SQLite,Couchbase;Firebase;MongoDB;MySQL;Oracle;Postgr...,Android;Linux;MacOS;Slack;Windows,Android;Docker;Kubernetes;Linux;Slack,Django;Express;Flask;jQuery;React.js;Spring,Flask;jQuery;React.js;Spring,Cordova;Node.js,Apache Spark;Hadoop;Node.js;React Native,IntelliJ;Notepad++;Vim,Linux-based,"Outside of work, for personal projects",Not at all,0,Yes,Also Yes,Yes,Facebook,In real life (in person),Username,I don't remember,Multiple times per day,Find answers to specific questions,More than 10 times per week,Stack Overflow was much faster,0,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","Yes, definitely",Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,30.0,Man,No,Straight / Heterosexual,White or of European descent;Multiracial,No,Appropriate in length,Easy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88377,0,True,Less than once a month but more than once per ...,The quality of OSS and closed source software ...,"Not employed, and not looking for work",Canada,No,Primary/elementary school,0,"Taught yourself a new language, framework, or ...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0.0,0.0,0,0,0,0,0,0,0.0,0,0,0,HTML/CSS;JavaScript;Other(s):,C++;HTML/CSS;JavaScript;SQL;WebAssembly;Other(s):,Firebase;SQLite,Firebase;MySQL;SQLite,Linux,Google Cloud Platform;Linux,jQuery,jQuery;Vue.js,Node.js,React Native;Unity 3D;Unreal Engine,Atom;Visual Studio;Visual Studio Code,Windows,I do not use containers,0,Useful across many domains and could change ma...,Yes,Yes,What?,YouTube,0,Username,I don't remember,A few times per week,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was slightly faster,11-30 minutes,Yes,I have never participated in Q&A on Stack Over...,"No, I knew that Stack Overflow had a job board...","No, I've heard of them, but I am not part of a...","No, not at all",0,Tech articles written by other developers;Tech...,0.0,Man,No,0,0,No,Appropriate in length,Easy
88601,0,False,Never,The quality of OSS and closed source software ...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0.0,0.0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0
88802,0,False,Never,0,Employed full-time,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0.0,0.0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0
88816,0,False,Never,"OSS is, on average, of HIGHER quality than pro...","Independent contractor, freelancer, or self-em...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0.0,0.0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0


In [None]:
df.dtypes['ConvertedComp']

dtype('float64')

In [None]:
# cast every cell o float
# df.astype(float)

In [None]:
na_vals = ['Missing', 'NA']
df = pd.read_csv('data/survey_results_public.csv', index_col='Respondent', na_values=na_vals)

In [None]:
# same outcomes
df.dtypes['YearsCode']
df['YearsCode'].dtypes

dtype('O')

In [None]:
df['YearsCode'].unique()

array(['4', nan, '3', '16', '13', '6', '8', '12', '2', '5', '17', '10',
       '14', '35', '7', 'Less than 1 year', '30', '9', '26', '40', '19',
       '15', '20', '28', '25', '1', '22', '11', '33', '50', '41', '18',
       '34', '24', '23', '42', '27', '21', '36', '32', '39', '38', '31',
       '37', 'More than 50 years', '29', '44', '45', '48', '46', '43',
       '47', '49'], dtype=object)

In [None]:
df['YearsCode'].replace('Less than 1 year', 0, inplace=True)
df['YearsCode'].replace('More than 50 years', 51, inplace=True)