In [56]:
import pandas as pd

In [57]:
df = pd.read_csv('data2020/survey_results_public.csv')
schema_df = pd.read_csv('data2020/survey_results_schema.csv')

In [58]:
people = {"first" : ["first1", "first2", "first3"], "last" : ["last1", "last2", "last3"], "email" : ["email1", "email2", "email3"]}
df = pd.DataFrame(people)

In [59]:
df

Unnamed: 0,first,last,email
0,first1,last1,email1
1,first2,last2,email2
2,first3,last3,email3


In [60]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [61]:
df.columns = ['first_name', 'last_name', 'email']

In [62]:
df

Unnamed: 0,first_name,last_name,email
0,first1,last1,email1
1,first2,last2,email2
2,first3,last3,email3


In [63]:
df.columns = [x.upper() for x in df.columns]

In [64]:
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,first1,last1,email1
1,first2,last2,email2
2,first3,last3,email3


In [65]:
df.columns = df.columns.str.replace('_', ' ')

In [66]:
df

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL
0,first1,last1,email1
1,first2,last2,email2
2,first3,last3,email3


In [67]:
df.columns = [x.lower() for x in df.columns]

In [68]:
df

Unnamed: 0,first name,last name,email
0,first1,last1,email1
1,first2,last2,email2
2,first3,last3,email3


In [69]:
df.columns = df.columns.str.replace(' ', '_')

In [70]:
df

Unnamed: 0,first_name,last_name,email
0,first1,last1,email1
1,first2,last2,email2
2,first3,last3,email3


In [71]:
df.rename(columns = {'first_name':'first', 'last_name':'last'})

Unnamed: 0,first,last,email
0,first1,last1,email1
1,first2,last2,email2
2,first3,last3,email3


In [72]:
df.rename(columns = {'first_name':'first', 'last_name':'last'}, inplace = True)

In [73]:
df

Unnamed: 0,first,last,email
0,first1,last1,email1
1,first2,last2,email2
2,first3,last3,email3


In [74]:
df.loc[2]

first    first3
last      last3
email    email3
Name: 2, dtype: object

In [75]:
df.loc[2] = ['FIRst3', 'LASt3', 'EMAil3']

In [76]:
df

Unnamed: 0,first,last,email
0,first1,last1,email1
1,first2,last2,email2
2,FIRst3,LASt3,EMAil3


In [77]:
df.loc[2, ['last', 'email']]

last      LASt3
email    EMAil3
Name: 2, dtype: object

In [78]:
df.loc[2, ['last', 'email']] = ['3Last', '3Email']

In [79]:
df

Unnamed: 0,first,last,email
0,first1,last1,email1
1,first2,last2,email2
2,FIRst3,3Last,3Email


In [80]:
df.loc[2, 'last'] = 'newlast2'

In [81]:
df

Unnamed: 0,first,last,email
0,first1,last1,email1
1,first2,last2,email2
2,FIRst3,newlast2,3Email


In [82]:
df.at[2, 'last'] = 'atlast2'

In [83]:
df

Unnamed: 0,first,last,email
0,first1,last1,email1
1,first2,last2,email2
2,FIRst3,atlast2,3Email


In [84]:
filt = (df['email'] == 'email2')

In [85]:
df[filt]['last']

1    last2
Name: last, dtype: object

In [86]:
filt = (df['email'] == '3Email')

In [87]:
df.loc[filt, 'last'] = 'newlast3'

In [88]:
df

Unnamed: 0,first,last,email
0,first1,last1,email1
1,first2,last2,email2
2,FIRst3,newlast3,3Email


In [89]:
df['email'].str.lower()

0    email1
1    email2
2    3email
Name: email, dtype: object

In [90]:
df['email'] = df['email'].str.lower()

In [91]:
df

Unnamed: 0,first,last,email
0,first1,last1,email1
1,first2,last2,email2
2,FIRst3,newlast3,3email


In [92]:
df['email'].apply(len)

0    6
1    6
2    6
Name: email, dtype: int64

In [93]:
def update_email(email):
    return email.upper()

In [94]:
df['email'].apply(update_email)

0    EMAIL1
1    EMAIL2
2    3EMAIL
Name: email, dtype: object

In [95]:
df['email'] = df['email'].apply(update_email)

In [96]:
df

Unnamed: 0,first,last,email
0,first1,last1,EMAIL1
1,first2,last2,EMAIL2
2,FIRst3,newlast3,3EMAIL


In [97]:
df['email'] = df['email'].apply(lambda x:x.lower())

In [98]:
df

Unnamed: 0,first,last,email
0,first1,last1,email1
1,first2,last2,email2
2,FIRst3,newlast3,3email


In [99]:
df['email'].apply(len)

0    6
1    6
2    6
Name: email, dtype: int64

In [100]:
df.apply(len)

first    3
last     3
email    3
dtype: int64

In [101]:
len(df['email'])

3

In [102]:
df.apply(len, axis = 'rows')

first    3
last     3
email    3
dtype: int64

In [103]:
df.apply(len, axis = 'columns')

0    3
1    3
2    3
dtype: int64

In [104]:
df.apply(pd.Series.min)

first    FIRst3
last      last1
email    3email
dtype: object

In [105]:
df.apply(lambda x : x.min())

first    FIRst3
last      last1
email    3email
dtype: object

In [106]:
df.applymap(len)

  df.applymap(len)


Unnamed: 0,first,last,email
0,6,5,6
1,6,5,6
2,6,8,6


In [107]:
df

Unnamed: 0,first,last,email
0,first1,last1,email1
1,first2,last2,email2
2,FIRst3,newlast3,3email


In [108]:
df.applymap(str.lower)

  df.applymap(str.lower)


Unnamed: 0,first,last,email
0,first1,last1,email1
1,first2,last2,email2
2,first3,newlast3,3email


In [109]:
df['first'].map({'first1':'newfirst1', 'last1':'newlast1'})

0    newfirst1
1          NaN
2          NaN
Name: first, dtype: object

In [111]:
df['first'].map({'first1':'newfirst1', 'first2':'newfirst2'})

0    newfirst1
1    newfirst2
2          NaN
Name: first, dtype: object

In [112]:
df['first'].replace({'first1':'newfirst1', 'last1':'newlast1'})

0    newfirst1
1       first2
2       FIRst3
Name: first, dtype: object

In [113]:
df['first'].replace({'first1':'newfirst1', 'first2':'newfirst2'})

0    newfirst1
1    newfirst2
2       FIRst3
Name: first, dtype: object

In [114]:
df['first'] = df['first'].replace({'first1':'newfirst1', 'first2':'newfirst2'})

In [115]:
df

Unnamed: 0,first,last,email
0,newfirst1,last1,email1
1,newfirst2,last2,email2
2,FIRst3,newlast3,3email


In [116]:
df = pd.read_csv('data2020/survey_results_public.csv')
df.rename(columns = {'ConvertedComp' : 'SalaryUSD'})

Unnamed: 0,Respondent,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,SalaryUSD,Country,CurrencyDesc,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
0,1,I am a developer by profession,Yes,,13,Monthly,,,Germany,European Euro,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27
1,2,I am a developer by profession,No,,19,,,,United Kingdom,Pound sterling,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4
2,3,I code primarily as a hobby,Yes,,15,,,,Russian Federation,,...,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
3,4,I am a developer by profession,Yes,25.0,18,,,,Albania,Albanian lek,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4
4,5,"I used to be a developer by profession, but no...",Yes,31.0,16,,,,United States,,...,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64456,64858,,Yes,,16,,,,United States,,...,,,,"Computer science, computer engineering, or sof...",,,,,10,Less than 1 year
64457,64867,,Yes,,,,,,Morocco,,...,,,,,,,,,,
64458,64898,,Yes,,,,,,Viet Nam,,...,,,,,,,,,,
64459,64925,,Yes,,,,,,Poland,,...,,,,,Angular;Angular.js;React.js,,,,,


In [117]:
df.rename(columns = {'ConvertedComp' : 'SalaryUSD'}, inplace = True)

In [118]:
df['SalaryUSD']

0       NaN
1       NaN
2       NaN
3       NaN
4       NaN
         ..
64456   NaN
64457   NaN
64458   NaN
64459   NaN
64460   NaN
Name: SalaryUSD, Length: 64461, dtype: float64

In [119]:
df['Hobbyist']

0        Yes
1         No
2        Yes
3        Yes
4        Yes
        ... 
64456    Yes
64457    Yes
64458    Yes
64459    Yes
64460    Yes
Name: Hobbyist, Length: 64461, dtype: object

In [120]:
df['Hobbyist'].map({'Yes':True, 'No':False})

0         True
1        False
2         True
3         True
4         True
         ...  
64456     True
64457     True
64458     True
64459     True
64460     True
Name: Hobbyist, Length: 64461, dtype: object

In [121]:
df['Hobbyist'] = df['Hobbyist'].map({'Yes':True, 'No':False})

In [122]:
df

Unnamed: 0,Respondent,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,SalaryUSD,Country,CurrencyDesc,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
0,1,I am a developer by profession,True,,13,Monthly,,,Germany,European Euro,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27
1,2,I am a developer by profession,False,,19,,,,United Kingdom,Pound sterling,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4
2,3,I code primarily as a hobby,True,,15,,,,Russian Federation,,...,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
3,4,I am a developer by profession,True,25.0,18,,,,Albania,Albanian lek,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4
4,5,"I used to be a developer by profession, but no...",True,31.0,16,,,,United States,,...,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64456,64858,,True,,16,,,,United States,,...,,,,"Computer science, computer engineering, or sof...",,,,,10,Less than 1 year
64457,64867,,True,,,,,,Morocco,,...,,,,,,,,,,
64458,64898,,True,,,,,,Viet Nam,,...,,,,,,,,,,
64459,64925,,True,,,,,,Poland,,...,,,,,Angular;Angular.js;React.js,,,,,
