In [190]:
import pandas as pd
import numpy as np

people = {
    "first":['Corey', 'Jane', 'John'],
    "last":['Schafer', 'Doe', 'Smith'],
    "email":['CoreySchafer@gmail.com','JaneDoe@hotmail.com','JohnSmith@outlook.com'],
}


pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_colwidth', None)  


pd.set_option('display.float_format', lambda x: '%.4f' % x)


survey_source = '/Users/josephyu/Documents/GitHub/data/survey_results_public.csv'
schema_source = '/Users/josephyu/Documents/GitHub/data/survey_results_schema.csv'


df = pd.read_csv(survey_source, index_col = 'Respondent')
schema_df = pd.read_csv(schema_source, index_col = 'Column')
pp_df = pd.DataFrame(people)

# ['Hobbyist', 'OpenSourcer', 'Region'] -> PERFECT Data Quality by column

In [8]:
df = pp_df

df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreySchafer@gmail.com
1,Jane,Doe,JaneDoe@hotmail.com
2,John,Smith,JohnSmith@outlook.com


In [10]:
# apply method
for i in df['email']:
    print(len(i))

22
19
21


In [35]:
df['Age'] = range(-31, -34, -1)

In [36]:
df

Unnamed: 0,first,last,email,Age
0,Corey,Schafer,CoreySchafer@gmail.com,-31
1,Jane,Doe,JaneDoe@hotmail.com,-32
2,John,Smith,JohnSmith@outlook.com,-33


In [39]:
df['Age'].apply(abs)

0    31
1    32
2    33
Name: Age, dtype: int64

In [43]:
li = [1, 9, 2, 8, 3, 7, 4, 6, 5]

In [45]:
sorted(li)

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [49]:
df.sort_values(by='Age')

Unnamed: 0,first,last,email,Age
2,John,Smith,JohnSmith@outlook.com,-33
1,Jane,Doe,JaneDoe@hotmail.com,-32
0,Corey,Schafer,CoreySchafer@gmail.com,-31


In [61]:
df['email'].apply(ascii)[0]

"'CoreySchafer@gmail.com'"

In [63]:
df['email'].tolist()[0]

'CoreySchafer@gmail.com'

In [70]:
df.columns = df.columns.str.title()

In [73]:
df.columns = ['First_Name', 'Last_Name', 'Email_Address', 'Age']

In [74]:
df

Unnamed: 0,First_Name,Last_Name,Email_Address,Age
0,Corey,Schafer,CoreySchafer@gmail.com,-31
1,Jane,Doe,JaneDoe@hotmail.com,-32
2,John,Smith,JohnSmith@outlook.com,-33


In [77]:
df['Age'] = df['Age'].apply(abs)

In [78]:
df

Unnamed: 0,First_Name,Last_Name,Email_Address,Age
0,Corey,Schafer,CoreySchafer@gmail.com,31
1,Jane,Doe,JaneDoe@hotmail.com,32
2,John,Smith,JohnSmith@outlook.com,33


In [79]:
df['Age'].apply(bin)

0     0b11111
1    0b100000
2    0b100001
Name: Age, dtype: object

In [86]:
df.columns = df.columns.str.replace('Email_Address', 'Email')

In [87]:
df

Unnamed: 0,First_Name,Last_Name,Email,Age
0,Corey,Schafer,CoreySchafer@gmail.com,31
1,Jane,Doe,JaneDoe@hotmail.com,32
2,John,Smith,JohnSmith@outlook.com,33


In [93]:
# Pass in the function w/o parenthesis

df['Email'].apply(str.lower)

0    coreyschafer@gmail.com
1       janedoe@hotmail.com
2     johnsmith@outlook.com
Name: Email, dtype: object

In [104]:
df['Email'].str.lower()

0    coreyschafer@gmail.com
1       janedoe@hotmail.com
2     johnsmith@outlook.com
Name: Email, dtype: object

In [99]:
df.iloc[:,:]

Unnamed: 0,First_Name,Last_Name,Email,Age
0,Corey,Schafer,CoreySchafer@gmail.com,31
1,Jane,Doe,JaneDoe@hotmail.com,32
2,John,Smith,JohnSmith@outlook.com,33


In [106]:
df['Email'].apply(lambda x: x.upper())

0    COREYSCHAFER@GMAIL.COM
1       JANEDOE@HOTMAIL.COM
2     JOHNSMITH@OUTLOOK.COM
Name: Email, dtype: object

In [107]:
df['Email'].str.upper()

0    COREYSCHAFER@GMAIL.COM
1       JANEDOE@HOTMAIL.COM
2     JOHNSMITH@OUTLOOK.COM
Name: Email, dtype: object

In [113]:
df.loc[:, 'First_Name':'Email']#.apply(str.upper)

Unnamed: 0,First_Name,Last_Name,Email
0,Corey,Schafer,CoreySchafer@gmail.com
1,Jane,Doe,JaneDoe@hotmail.com
2,John,Smith,JohnSmith@outlook.com


In [115]:
df['First_Name'].apply(str.upper)

0    COREY
1     JANE
2     JOHN
Name: First_Name, dtype: object

In [117]:
df['Last_Name'].apply(str.upper)

0    SCHAFER
1        DOE
2      SMITH
Name: Last_Name, dtype: object

In [122]:
pp_df.applymap(str.upper)

Unnamed: 0,first,last,email
0,COREY,SCHAFER,COREYSCHAFER@GMAIL.COM
1,JANE,DOE,JANEDOE@HOTMAIL.COM
2,JOHN,SMITH,JOHNSMITH@OUTLOOK.COM


In [132]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 88883 entries, 1 to 88863
Data columns (total 84 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   MainBranch              88331 non-null  object 
 1   Hobbyist                88883 non-null  object 
 2   OpenSourcer             88883 non-null  object 
 3   OpenSource              86842 non-null  object 
 4   Employment              87181 non-null  object 
 5   Country                 88751 non-null  object 
 6   Student                 87014 non-null  object 
 7   EdLevel                 86390 non-null  object 
 8   UndergradMajor          75614 non-null  object 
 9   EduOther                84260 non-null  object 
 10  OrgSize                 71791 non-null  object 
 11  DevType                 81335 non-null  object 
 12  YearsCode               87938 non-null  object 
 13  Age1stCode              87634 non-null  object 
 14  YearsCodePro            74331 non-null

In [139]:
len(df.select_dtypes(include=object).columns)

79

In [140]:
len(df.select_dtypes(include=np.number).columns)

5

In [141]:
len(df.columns)

84

In [148]:
# 🧠 ⭐️ fillna("")

df.select_dtypes(include=object).fillna('').applymap(str.upper)

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompFreq,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1
1,I AM A STUDENT WHO IS LEARNING TO CODE,YES,NEVER,THE QUALITY OF OSS AND CLOSED SOURCE SOFTWARE IS ABOUT THE SAME,"NOT EMPLOYED, AND NOT LOOKING FOR WORK",UNITED KINGDOM,NO,PRIMARY/ELEMENTARY SCHOOL,,"TAUGHT YOURSELF A NEW LANGUAGE, FRAMEWORK, OR TOOL WITHOUT TAKING A FORMAL COURSE",,,4,10,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;JAVA;JAVASCRIPT;PYTHON,C;C++;C#;GO;HTML/CSS;JAVA;JAVASCRIPT;PYTHON;SQL,SQLITE,MYSQL,MACOS;WINDOWS,ANDROID;ARDUINO;WINDOWS,DJANGO;FLASK,FLASK;JQUERY,NODE.JS,NODE.JS,INTELLIJ;NOTEPAD++;PYCHARM,WINDOWS,I DO NOT USE CONTAINERS,,,YES,"FORTUNATELY, SOMEONE ELSE HAS THAT TITLE",YES,TWITTER,ONLINE,USERNAME,2017,A FEW TIMES PER MONTH OR WEEKLY,FIND ANSWERS TO SPECIFIC QUESTIONS;LEARN HOW TO DO THINGS I DIDN’T NECESSARILY LOOK FOR,3-5 TIMES PER WEEK,STACK OVERFLOW WAS MUCH FASTER,31-60 MINUTES,NO,,"NO, I DIDN'T KNOW THAT STACK OVERFLOW HAD A JOB BOARD","NO, AND I DON'T KNOW WHAT THOSE ARE",NEUTRAL,JUST AS WELCOME NOW AS I FELT LAST YEAR,TECH ARTICLES WRITTEN BY OTHER DEVELOPERS;INDUSTRY NEWS ABOUT TECHNOLOGIES YOU'RE INTERESTED IN;COURSES ON TECHNOLOGIES YOU'RE INTERESTED IN,MAN,NO,STRAIGHT / HETEROSEXUAL,,NO,APPROPRIATE IN LENGTH,NEITHER EASY NOR DIFFICULT
2,I AM A STUDENT WHO IS LEARNING TO CODE,NO,LESS THAN ONCE PER YEAR,THE QUALITY OF OSS AND CLOSED SOURCE SOFTWARE IS ABOUT THE SAME,"NOT EMPLOYED, BUT LOOKING FOR WORK",BOSNIA AND HERZEGOVINA,"YES, FULL-TIME","SECONDARY SCHOOL (E.G. AMERICAN HIGH SCHOOL, GERMAN REALSCHULE OR GYMNASIUM, ETC.)",,TAKEN AN ONLINE COURSE IN PROGRAMMING OR SOFTWARE DEVELOPMENT (E.G. A MOOC),,"DEVELOPER, DESKTOP OR ENTERPRISE APPLICATIONS;DEVELOPER, FRONT-END",,17,,,,,,,I AM ACTIVELY LOOKING FOR A JOB,I'VE NEVER HAD A JOB,,,"FINANCIAL PERFORMANCE OR FUNDING STATUS OF THE COMPANY OR ORGANIZATION;SPECIFIC DEPARTMENT OR TEAM I'D BE WORKING ON;LANGUAGES, FRAMEWORKS, AND OTHER TECHNOLOGIES I'D BE WORKING WITH","SOMETHING ELSE CHANGED (EDUCATION, AWARD, MEDIA, ETC.)",,,,,,,,,,,,,C++;HTML/CSS;PYTHON,C++;HTML/CSS;JAVASCRIPT;SQL,,MYSQL,WINDOWS,WINDOWS,DJANGO,DJANGO,,,ATOM;PYCHARM,WINDOWS,I DO NOT USE CONTAINERS,,USEFUL ACROSS MANY DOMAINS AND COULD CHANGE MANY ASPECTS OF OUR LIVES,YES,YES,YES,INSTAGRAM,ONLINE,USERNAME,2017,DAILY OR ALMOST DAILY,FIND ANSWERS TO SPECIFIC QUESTIONS;LEARN HOW TO DO THINGS I DIDN’T NECESSARILY LOOK FOR,3-5 TIMES PER WEEK,STACK OVERFLOW WAS MUCH FASTER,11-30 MINUTES,YES,A FEW TIMES PER MONTH OR WEEKLY,"NO, I KNEW THAT STACK OVERFLOW HAD A JOB BOARD BUT HAVE NEVER USED OR VISITED IT","NO, AND I DON'T KNOW WHAT THOSE ARE","YES, SOMEWHAT",JUST AS WELCOME NOW AS I FELT LAST YEAR,TECH ARTICLES WRITTEN BY OTHER DEVELOPERS;INDUSTRY NEWS ABOUT TECHNOLOGIES YOU'RE INTERESTED IN;TECH MEETUPS OR EVENTS IN YOUR AREA;COURSES ON TECHNOLOGIES YOU'RE INTERESTED IN,MAN,NO,STRAIGHT / HETEROSEXUAL,,NO,APPROPRIATE IN LENGTH,NEITHER EASY NOR DIFFICULT
3,"I AM NOT PRIMARILY A DEVELOPER, BUT I WRITE CODE SOMETIMES AS PART OF MY WORK",YES,NEVER,THE QUALITY OF OSS AND CLOSED SOURCE SOFTWARE IS ABOUT THE SAME,EMPLOYED FULL-TIME,THAILAND,NO,"BACHELOR’S DEGREE (BA, BS, B.ENG., ETC.)",WEB DEVELOPMENT OR WEB DESIGN,"TAUGHT YOURSELF A NEW LANGUAGE, FRAMEWORK, OR TOOL WITHOUT TAKING A FORMAL COURSE",100 TO 499 EMPLOYEES,"DESIGNER;DEVELOPER, BACK-END;DEVELOPER, FRONT-END;DEVELOPER, FULL-STACK",3,22,1,SLIGHTLY SATISFIED,SLIGHTLY SATISFIED,NOT AT ALL CONFIDENT,NOT SURE,NOT SURE,"I’M NOT ACTIVELY LOOKING, BUT I AM OPEN TO NEW OPPORTUNITIES",1-2 YEARS AGO,INTERVIEW WITH PEOPLE IN PEER ROLES,NO,"LANGUAGES, FRAMEWORKS, AND OTHER TECHNOLOGIES I'D BE WORKING WITH;REMOTE WORK OPTIONS;FLEX TIME OR A FLEXIBLE SCHEDULE",I WAS PREPARING FOR A JOB SEARCH,THB,THAI BAHT,MONTHLY,THERE'S NO SCHEDULE OR SPEC; I WORK ON WHAT SEEMS MOST IMPORTANT OR URGENT,DISTRACTING WORK ENVIRONMENT;INADEQUATE ACCESS TO NECESSARY TOOLS;LACK OF SUPPORT FROM MANAGEMENT,LESS THAN ONCE PER MONTH / NEVER,HOME,AVERAGE,NO,"NO, BUT I THINK WE SHOULD",NOT SURE,I HAVE LITTLE OR NO INFLUENCE,HTML/CSS,ELIXIR;HTML/CSS,POSTGRESQL,POSTGRESQL,,,,OTHER(S):,,,VIM;VISUAL STUDIO CODE,LINUX-BASED,I DO NOT USE CONTAINERS,,,YES,YES,YES,REDDIT,IN REAL LIFE (IN PERSON),USERNAME,2011,A FEW TIMES PER WEEK,FIND ANSWERS TO SPECIFIC QUESTIONS;LEARN HOW TO DO THINGS I DIDN’T NECESSARILY LOOK FOR,6-10 TIMES PER WEEK,THEY WERE ABOUT THE SAME,,YES,LESS THAN ONCE PER MONTH OR MONTHLY,YES,"NO, I'VE HEARD OF THEM, BUT I AM NOT PART OF A PRIVATE Q&A INSTANCE",NEUTRAL,JUST AS WELCOME NOW AS I FELT LAST YEAR,TECH MEETUPS OR EVENTS IN YOUR AREA;COURSES ON TECHNOLOGIES YOU'RE INTERESTED IN,MAN,NO,STRAIGHT / HETEROSEXUAL,,YES,APPROPRIATE IN LENGTH,NEITHER EASY NOR DIFFICULT
4,I AM A DEVELOPER BY PROFESSION,NO,NEVER,THE QUALITY OF OSS AND CLOSED SOURCE SOFTWARE IS ABOUT THE SAME,EMPLOYED FULL-TIME,UNITED STATES,NO,"BACHELOR’S DEGREE (BA, BS, B.ENG., ETC.)","COMPUTER SCIENCE, COMPUTER ENGINEERING, OR SOFTWARE ENGINEERING","TAKEN AN ONLINE COURSE IN PROGRAMMING OR SOFTWARE DEVELOPMENT (E.G. A MOOC);RECEIVED ON-THE-JOB TRAINING IN SOFTWARE DEVELOPMENT;TAUGHT YOURSELF A NEW LANGUAGE, FRAMEWORK, OR TOOL WITHOUT TAKING A FORMAL COURSE",100 TO 499 EMPLOYEES,"DEVELOPER, FULL-STACK",3,16,LESS THAN 1 YEAR,VERY SATISFIED,SLIGHTLY SATISFIED,VERY CONFIDENT,NO,NOT SURE,I AM NOT INTERESTED IN NEW JOB OPPORTUNITIES,LESS THAN A YEAR AGO,"WRITE CODE BY HAND (E.G., ON A WHITEBOARD);INTERVIEW WITH PEOPLE IN SENIOR / MANAGEMENT ROLES",NO,"LANGUAGES, FRAMEWORKS, AND OTHER TECHNOLOGIES I'D BE WORKING WITH;OFFICE ENVIRONMENT OR COMPANY CULTURE;OPPORTUNITIES FOR PROFESSIONAL DEVELOPMENT",I WAS PREPARING FOR A JOB SEARCH,USD,UNITED STATES DOLLAR,YEARLY,THERE'S NO SCHEDULE OR SPEC; I WORK ON WHAT SEEMS MOST IMPORTANT OR URGENT,,LESS THAN ONCE PER MONTH / NEVER,HOME,A LITTLE BELOW AVERAGE,NO,"NO, BUT I THINK WE SHOULD",DEVELOPERS TYPICALLY HAVE THE MOST INFLUENCE ON PURCHASING NEW TECHNOLOGY,I HAVE LITTLE OR NO INFLUENCE,C;C++;C#;PYTHON;SQL,C;C#;JAVASCRIPT;SQL,MYSQL;SQLITE,MYSQL;SQLITE,LINUX;WINDOWS,LINUX;WINDOWS,,,.NET,.NET,ECLIPSE;VIM;VISUAL STUDIO;VISUAL STUDIO CODE,WINDOWS,I DO NOT USE CONTAINERS,NOT AT ALL,"USEFUL FOR DECENTRALIZED CURRENCY (I.E., BITCOIN)",YES,SIGH,YES,REDDIT,IN REAL LIFE (IN PERSON),USERNAME,2014,DAILY OR ALMOST DAILY,FIND ANSWERS TO SPECIFIC QUESTIONS;PASS THE TIME / RELAX,1-2 TIMES PER WEEK,STACK OVERFLOW WAS MUCH FASTER,31-60 MINUTES,YES,LESS THAN ONCE PER MONTH OR MONTHLY,YES,"NO, AND I DON'T KNOW WHAT THOSE ARE","NO, NOT REALLY",JUST AS WELCOME NOW AS I FELT LAST YEAR,TECH ARTICLES WRITTEN BY OTHER DEVELOPERS;INDUSTRY NEWS ABOUT TECHNOLOGIES YOU'RE INTERESTED IN;TECH MEETUPS OR EVENTS IN YOUR AREA;COURSES ON TECHNOLOGIES YOU'RE INTERESTED IN,MAN,NO,STRAIGHT / HETEROSEXUAL,WHITE OR OF EUROPEAN DESCENT,NO,APPROPRIATE IN LENGTH,EASY
5,I AM A DEVELOPER BY PROFESSION,YES,ONCE A MONTH OR MORE OFTEN,"OSS IS, ON AVERAGE, OF HIGHER QUALITY THAN PROPRIETARY / CLOSED SOURCE SOFTWARE",EMPLOYED FULL-TIME,UKRAINE,NO,"BACHELOR’S DEGREE (BA, BS, B.ENG., ETC.)","COMPUTER SCIENCE, COMPUTER ENGINEERING, OR SOFTWARE ENGINEERING","TAKEN AN ONLINE COURSE IN PROGRAMMING OR SOFTWARE DEVELOPMENT (E.G. A MOOC);RECEIVED ON-THE-JOB TRAINING IN SOFTWARE DEVELOPMENT;TAUGHT YOURSELF A NEW LANGUAGE, FRAMEWORK, OR TOOL WITHOUT TAKING A FORMAL COURSE;CONTRIBUTED TO OPEN SOURCE SOFTWARE","10,000 OR MORE EMPLOYEES","ACADEMIC RESEARCHER;DEVELOPER, DESKTOP OR ENTERPRISE APPLICATIONS;DEVELOPER, FULL-STACK;DEVELOPER, MOBILE",16,14,9,VERY DISSATISFIED,SLIGHTLY DISSATISFIED,SOMEWHAT CONFIDENT,YES,NO,I AM NOT INTERESTED IN NEW JOB OPPORTUNITIES,LESS THAN A YEAR AGO,"WRITE ANY CODE;WRITE CODE BY HAND (E.G., ON A WHITEBOARD);SOLVE A BRAIN-TEASER STYLE PUZZLE;INTERVIEW WITH PEOPLE IN SENIOR / MANAGEMENT ROLES",NO,"INDUSTRY THAT I'D BE WORKING IN;LANGUAGES, FRAMEWORKS, AND OTHER TECHNOLOGIES I'D BE WORKING WITH;FLEX TIME OR A FLEXIBLE SCHEDULE",I WAS PREPARING FOR A JOB SEARCH,UAH,UKRAINIAN HRYVNIA,,"THERE IS A SCHEDULE AND/OR SPEC (MADE BY ME OR BY A COLLEAGUE), AND I FOLLOW IT VERY CLOSELY","BEING TASKED WITH NON-DEVELOPMENT WORK;INADEQUATE ACCESS TO NECESSARY TOOLS;NON-WORK COMMITMENTS (PARENTING, SCHOOL WORK, HOBBIES, ETC.)",A FEW DAYS EACH MONTH,OFFICE,A LITTLE ABOVE AVERAGE,"YES, BECAUSE I SEE VALUE IN CODE REVIEW","YES, IT'S PART OF OUR PROCESS",NOT SURE,I HAVE LITTLE OR NO INFLUENCE,C++;HTML/CSS;JAVA;JAVASCRIPT;PYTHON;SQL;VBA,HTML/CSS;JAVA;JAVASCRIPT;SQL;WEBASSEMBLY,COUCHBASE;MONGODB;MYSQL;ORACLE;POSTGRESQL;SQLITE,COUCHBASE;FIREBASE;MONGODB;MYSQL;ORACLE;POSTGRESQL;SQLITE,ANDROID;LINUX;MACOS;SLACK;WINDOWS,ANDROID;DOCKER;KUBERNETES;LINUX;SLACK,DJANGO;EXPRESS;FLASK;JQUERY;REACT.JS;SPRING,FLASK;JQUERY;REACT.JS;SPRING,CORDOVA;NODE.JS,APACHE SPARK;HADOOP;NODE.JS;REACT NATIVE,INTELLIJ;NOTEPAD++;VIM,LINUX-BASED,"OUTSIDE OF WORK, FOR PERSONAL PROJECTS",NOT AT ALL,,YES,ALSO YES,YES,FACEBOOK,IN REAL LIFE (IN PERSON),USERNAME,I DON'T REMEMBER,MULTIPLE TIMES PER DAY,FIND ANSWERS TO SPECIFIC QUESTIONS,MORE THAN 10 TIMES PER WEEK,STACK OVERFLOW WAS MUCH FASTER,,YES,A FEW TIMES PER MONTH OR WEEKLY,"NO, I KNEW THAT STACK OVERFLOW HAD A JOB BOARD BUT HAVE NEVER USED OR VISITED IT","NO, I'VE HEARD OF THEM, BUT I AM NOT PART OF A PRIVATE Q&A INSTANCE","YES, DEFINITELY",JUST AS WELCOME NOW AS I FELT LAST YEAR,TECH MEETUPS OR EVENTS IN YOUR AREA;COURSES ON TECHNOLOGIES YOU'RE INTERESTED IN,MAN,NO,STRAIGHT / HETEROSEXUAL,WHITE OR OF EUROPEAN DESCENT;MULTIRACIAL,NO,APPROPRIATE IN LENGTH,EASY
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88377,,YES,LESS THAN ONCE A MONTH BUT MORE THAN ONCE PER YEAR,THE QUALITY OF OSS AND CLOSED SOURCE SOFTWARE IS ABOUT THE SAME,"NOT EMPLOYED, AND NOT LOOKING FOR WORK",CANADA,NO,PRIMARY/ELEMENTARY SCHOOL,,"TAUGHT YOURSELF A NEW LANGUAGE, FRAMEWORK, OR TOOL WITHOUT TAKING A FORMAL COURSE;CONTRIBUTED TO OPEN SOURCE SOFTWARE",,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;JAVASCRIPT;OTHER(S):,C++;HTML/CSS;JAVASCRIPT;SQL;WEBASSEMBLY;OTHER(S):,FIREBASE;SQLITE,FIREBASE;MYSQL;SQLITE,LINUX,GOOGLE CLOUD PLATFORM;LINUX,JQUERY,JQUERY;VUE.JS,NODE.JS,REACT NATIVE;UNITY 3D;UNREAL ENGINE,ATOM;VISUAL STUDIO;VISUAL STUDIO CODE,WINDOWS,I DO NOT USE CONTAINERS,,USEFUL ACROSS MANY DOMAINS AND COULD CHANGE MANY ASPECTS OF OUR LIVES,YES,YES,WHAT?,YOUTUBE,,USERNAME,I DON'T REMEMBER,A FEW TIMES PER WEEK,FIND ANSWERS TO SPECIFIC QUESTIONS;LEARN HOW TO DO THINGS I DIDN’T NECESSARILY LOOK FOR;PASS THE TIME / RELAX,3-5 TIMES PER WEEK,STACK OVERFLOW WAS SLIGHTLY FASTER,11-30 MINUTES,YES,I HAVE NEVER PARTICIPATED IN Q&A ON STACK OVERFLOW,"NO, I KNEW THAT STACK OVERFLOW HAD A JOB BOARD BUT HAVE NEVER USED OR VISITED IT","NO, I'VE HEARD OF THEM, BUT I AM NOT PART OF A PRIVATE Q&A INSTANCE","NO, NOT AT ALL",,TECH ARTICLES WRITTEN BY OTHER DEVELOPERS;TECH MEETUPS OR EVENTS IN YOUR AREA;COURSES ON TECHNOLOGIES YOU'RE INTERESTED IN,MAN,NO,,,NO,APPROPRIATE IN LENGTH,EASY
88601,,NO,NEVER,THE QUALITY OF OSS AND CLOSED SOURCE SOFTWARE IS ABOUT THE SAME,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
88802,,NO,NEVER,,EMPLOYED FULL-TIME,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
88816,,NO,NEVER,"OSS IS, ON AVERAGE, OF HIGHER QUALITY THAN PROPRIETARY / CLOSED SOURCE SOFTWARE","INDEPENDENT CONTRACTOR, FREELANCER, OR SELF-EMPLOYED",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [164]:
pp_df.applymap(len)

Unnamed: 0,first,last,email
0,5,7,22
1,4,3,19
2,4,5,21


In [167]:
df.select_dtypes(include=object).fillna('').applymap(len)

Unnamed: 0_level_0,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompFreq,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
Respondent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1
1,38,3,5,63,38,14,2,25,0,81,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,47,6,5,13,23,12,12,7,7,26,7,23,0,0,3,40,3,7,6,8,4,31,87,18,30,13,2,0,53,35,7,39,140,3,2,23,0,2,21,26
2,38,2,23,63,34,22,14,82,0,75,0,66,0,2,0,0,0,0,0,0,31,20,0,0,182,54,0,0,0,0,0,0,0,0,0,0,0,0,19,27,0,5,7,7,6,6,0,0,12,7,23,0,69,3,3,3,9,6,8,4,21,87,18,30,13,3,31,80,35,13,39,176,3,2,23,0,2,21,26
3,77,3,5,63,18,8,2,40,29,81,20,71,1,2,1,18,18,20,8,8,60,13,35,2,118,32,3,9,7,74,97,32,4,7,2,25,8,29,8,15,10,10,0,0,0,9,0,0,22,11,23,0,0,3,3,3,6,24,8,4,20,87,19,24,0,3,35,3,67,7,39,80,3,2,23,0,3,21,26
4,30,2,5,63,18,13,2,40,63,210,20,21,1,2,16,14,18,14,2,8,44,20,93,2,146,32,3,20,6,74,0,32,4,22,2,25,73,29,19,19,12,12,13,13,0,0,4,4,44,7,23,10,49,3,4,3,6,24,8,4,21,56,18,30,13,3,35,3,35,14,39,176,3,2,23,28,2,21,4
5,30,3,26,79,18,7,2,40,63,246,24,105,2,2,1,17,21,18,3,2,44,20,142,2,130,32,3,17,0,92,136,21,6,22,39,29,8,29,43,40,48,57,33,37,43,28,15,40,22,11,38,10,0,3,8,3,8,24,8,16,22,34,27,30,0,3,31,80,67,15,39,80,3,2,23,40,2,21,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88377,0,3,50,63,38,6,2,25,0,117,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,29,49,15,21,5,27,6,13,7,35,37,7,23,0,69,3,3,5,7,0,8,16,20,109,18,34,13,3,50,80,67,14,0,122,3,2,0,0,2,21,4
88601,0,2,5,63,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
88802,0,2,5,0,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
88816,0,2,5,79,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [181]:
df = pp_df
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreySchafer@gmail.com
1,Jane,Doe,JaneDoe@hotmail.com
2,John,Smith,JohnSmith@outlook.com


In [182]:
df['first'].replace('Jane', 'Adam')

0    Corey
1     Adam
2     John
Name: first, dtype: object

In [188]:
df['first'].replace({
    'Corey': 'Mark_1',
    'Jane': 'Mark_2',
    'John': 'Mark_3'}, 
    inplace=True)

In [189]:
df

Unnamed: 0,first,last,email
0,Mark_1,Schafer,CoreySchafer@gmail.com
1,Mark_2,Doe,JaneDoe@hotmail.com
2,Mark_3,Smith,JohnSmith@outlook.com


In [178]:
# 🧭 Replace 1 value ONLY -> replace(a, b)
# 🧭 Replace 2 or more values -> replace({dict})


df['first'].replace({
    'John': 'Joseph', 
    'Corey': 'Random'
    })

0    Random
1      Jane
2    Joseph
Name: first, dtype: object

In [173]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreySchafer@gmail.com
1,Jane,Doe,JaneDoe@hotmail.com
2,John,Smith,JohnSmith@outlook.com


In [203]:
df.isnull().sum().sort_values().values == df.isna().sum().sort_values().values

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True])

In [207]:
df['Hobbyist'].replace({
    'Yes': True,
    'No': False
})

Respondent
1         True
2        False
3         True
4        False
5         True
         ...  
88377     True
88601    False
88802    False
88816    False
88863     True
Name: Hobbyist, Length: 88883, dtype: bool