In [12]:
import pandas as pd

In [13]:
df = pd.read_csv('./developer_survey_2020/survey_results_public.csv')
schema_df = pd.read_csv('./developer_survey_2020/survey_results_schema.csv')

In [14]:
people = {
    'name' : ['vasu','parth','nidhi'],
    'last' : ['gami','gami','merja'],
    'email': ['vatsalgami86@gmail.com','gamiparth1@gmail.com','merjanidhi@gmail.com']
}
pdf = pd.DataFrame(people)

In [15]:
pdf

Unnamed: 0,name,last,email
0,vasu,gami,vatsalgami86@gmail.com
1,parth,gami,gamiparth1@gmail.com
2,nidhi,merja,merjanidhi@gmail.com


In [16]:
for key,value in people.items():
    print(key,value)

name ['vasu', 'parth', 'nidhi']
last ['gami', 'gami', 'merja']
email ['vatsalgami86@gmail.com', 'gamiparth1@gmail.com', 'merjanidhi@gmail.com']


In [17]:
pdf.columns = ['first_name','last_name','email']
pdf

Unnamed: 0,first_name,last_name,email
0,vasu,gami,vatsalgami86@gmail.com
1,parth,gami,gamiparth1@gmail.com
2,nidhi,merja,merjanidhi@gmail.com


In [21]:
pdf.columns = [x.upper() for x in pdf.columns]
pdf

Unnamed: 0,FIRST,LAST,EMAIL
0,vasu,gami,vatsalgami86@gmail.com
1,parth,gami,gamiparth1@gmail.com
2,nidhi,merja,merjanidhi@gmail.com


In [22]:
pdf.columns = pdf.columns.str.replace('_',' ')
pdf

Unnamed: 0,FIRST,LAST,EMAIL
0,vasu,gami,vatsalgami86@gmail.com
1,parth,gami,gamiparth1@gmail.com
2,nidhi,merja,merjanidhi@gmail.com


In [23]:
pdf.columns = [x.lower() for x in pdf.columns]
pdf

Unnamed: 0,first,last,email
0,vasu,gami,vatsalgami86@gmail.com
1,parth,gami,gamiparth1@gmail.com
2,nidhi,merja,merjanidhi@gmail.com


In [24]:
pdf.loc[2,['last','email']] = ['patel','patelnidhi@gmail.com']
pdf

Unnamed: 0,first,last,email
0,vasu,gami,vatsalgami86@gmail.com
1,parth,gami,gamiparth1@gmail.com
2,nidhi,patel,patelnidhi@gmail.com


In [25]:
# This is wrong way to update anything
filt = pdf['last'] == 'patel'
pdf[filt]['first'] = 'Drashti'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pdf[filt]['first'] = 'Drashti'


In [27]:
# This is the proper way to update single value

pdf.loc[filt,'first'] = 'Drashti'
pdf

Unnamed: 0,first,last,email
0,vasu,gami,vatsalgami86@gmail.com
1,parth,gami,gamiparth1@gmail.com
2,Drashti,patel,patelnidhi@gmail.com


In [31]:
def function(email):
    return email.upper()
pdf['last'] = pdf['last'].apply(function)

In [32]:
pdf

Unnamed: 0,first,last,email
0,vasu,GAMI,vatsalgami86@gmail.com
1,parth,GAMI,gamiparth1@gmail.com
2,Drashti,PATEL,patelnidhi@gmail.com


In [33]:
pdf['last'] = pdf['last'].apply(lambda x:x.lower())
pdf

Unnamed: 0,first,last,email
0,vasu,gami,vatsalgami86@gmail.com
1,parth,gami,gamiparth1@gmail.com
2,Drashti,patel,patelnidhi@gmail.com


In [34]:
pdf['last'].apply(len)

0    4
1    4
2    5
Name: last, dtype: int64

In [35]:
pdf.apply(len) # To apply it on columns we can write pdf.apply(len,axis='columns')

first    3
last     3
email    3
dtype: int64

In [37]:
pdf.apply(pd.Series.min)  # also use lambda e.g. pdf.apply(lambda x : x.min())  Here x is series

first                 Drashti
last                     gami
email    gamiparth1@gmail.com
dtype: object

In [38]:
pdf.applymap(len) #applymap is used to apply any function to whole dataframe

Unnamed: 0,first,last,email
0,4,4,22
1,5,4,20
2,7,5,20


In [39]:
pdf.applymap(str.upper)

Unnamed: 0,first,last,email
0,VASU,GAMI,VATSALGAMI86@GMAIL.COM
1,PARTH,GAMI,GAMIPARTH1@GMAIL.COM
2,DRASHTI,PATEL,PATELNIDHI@GMAIL.COM


In [45]:
pdf['first'].map({'vasu':'vatsal','parth':'parthu','DRASHTI':'DAKLO'})

0    vatsal
1    parthu
2       NaN
Name: first, dtype: object

In [44]:
pdf['first'].replace({'vasu':'vatsal','parth':'parthu'})

0     vatsal
1     parthu
2    Drashti
Name: first, dtype: object

In [43]:
pdf

Unnamed: 0,first,last,email
0,vasu,gami,vatsalgami86@gmail.com
1,parth,gami,gamiparth1@gmail.com
2,Drashti,patel,patelnidhi@gmail.com


In [46]:
df.columns

Index(['Respondent', 'MainBranch', 'Hobbyist', 'Age', 'Age1stCode', 'CompFreq',
       'CompTotal', 'ConvertedComp', 'Country', 'CurrencyDesc',
       'CurrencySymbol', 'DatabaseDesireNextYear', 'DatabaseWorkedWith',
       'DevType', 'EdLevel', 'Employment', 'Ethnicity', 'Gender', 'JobFactors',
       'JobSat', 'JobSeek', 'LanguageDesireNextYear', 'LanguageWorkedWith',
       'MiscTechDesireNextYear', 'MiscTechWorkedWith',
       'NEWCollabToolsDesireNextYear', 'NEWCollabToolsWorkedWith', 'NEWDevOps',
       'NEWDevOpsImpt', 'NEWEdImpt', 'NEWJobHunt', 'NEWJobHuntResearch',
       'NEWLearn', 'NEWOffTopic', 'NEWOnboardGood', 'NEWOtherComms',
       'NEWOvertime', 'NEWPurchaseResearch', 'NEWPurpleLink', 'NEWSOSites',
       'NEWStuck', 'OpSys', 'OrgSize', 'PlatformDesireNextYear',
       'PlatformWorkedWith', 'PurchaseWhat', 'Sexuality', 'SOAccount',
       'SOComm', 'SOPartFreq', 'SOVisitFreq', 'SurveyEase', 'SurveyLength',
       'Trans', 'UndergradMajor', 'WebframeDesireNextYear',
  

In [47]:
df.rename(columns={'ConvertedComp':'SalaryUSD'},inplace=True)

In [48]:
df.columns

Index(['Respondent', 'MainBranch', 'Hobbyist', 'Age', 'Age1stCode', 'CompFreq',
       'CompTotal', 'SalaryUSD', 'Country', 'CurrencyDesc', 'CurrencySymbol',
       'DatabaseDesireNextYear', 'DatabaseWorkedWith', 'DevType', 'EdLevel',
       'Employment', 'Ethnicity', 'Gender', 'JobFactors', 'JobSat', 'JobSeek',
       'LanguageDesireNextYear', 'LanguageWorkedWith',
       'MiscTechDesireNextYear', 'MiscTechWorkedWith',
       'NEWCollabToolsDesireNextYear', 'NEWCollabToolsWorkedWith', 'NEWDevOps',
       'NEWDevOpsImpt', 'NEWEdImpt', 'NEWJobHunt', 'NEWJobHuntResearch',
       'NEWLearn', 'NEWOffTopic', 'NEWOnboardGood', 'NEWOtherComms',
       'NEWOvertime', 'NEWPurchaseResearch', 'NEWPurpleLink', 'NEWSOSites',
       'NEWStuck', 'OpSys', 'OrgSize', 'PlatformDesireNextYear',
       'PlatformWorkedWith', 'PurchaseWhat', 'Sexuality', 'SOAccount',
       'SOComm', 'SOPartFreq', 'SOVisitFreq', 'SurveyEase', 'SurveyLength',
       'Trans', 'UndergradMajor', 'WebframeDesireNextYear',
      

In [49]:
df['Hobbyist'] = df['Hobbyist'].map({'Yes':True,'No':False})

In [50]:
df

Unnamed: 0,Respondent,MainBranch,Hobbyist,Age,Age1stCode,CompFreq,CompTotal,SalaryUSD,Country,CurrencyDesc,...,SurveyEase,SurveyLength,Trans,UndergradMajor,WebframeDesireNextYear,WebframeWorkedWith,WelcomeChange,WorkWeekHrs,YearsCode,YearsCodePro
0,1,I am a developer by profession,True,,13,Monthly,,,Germany,European Euro,...,Neither easy nor difficult,Appropriate in length,No,"Computer science, computer engineering, or sof...",ASP.NET Core,ASP.NET;ASP.NET Core,Just as welcome now as I felt last year,50.0,36,27
1,2,I am a developer by profession,False,,19,,,,United Kingdom,Pound sterling,...,,,,"Computer science, computer engineering, or sof...",,,Somewhat more welcome now than last year,,7,4
2,3,I code primarily as a hobby,True,,15,,,,Russian Federation,,...,Neither easy nor difficult,Appropriate in length,,,,,Somewhat more welcome now than last year,,4,
3,4,I am a developer by profession,True,25.0,18,,,,Albania,Albanian lek,...,,,No,"Computer science, computer engineering, or sof...",,,Somewhat less welcome now than last year,40.0,7,4
4,5,"I used to be a developer by profession, but no...",True,31.0,16,,,,United States,,...,Easy,Too short,No,"Computer science, computer engineering, or sof...",Django;Ruby on Rails,Ruby on Rails,Just as welcome now as I felt last year,,15,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64456,64858,,True,,16,,,,United States,,...,,,,"Computer science, computer engineering, or sof...",,,,,10,Less than 1 year
64457,64867,,True,,,,,,Morocco,,...,,,,,,,,,,
64458,64898,,True,,,,,,Viet Nam,,...,,,,,,,,,,
64459,64925,,True,,,,,,Poland,,...,,,,,Angular;Angular.js;React.js,,,,,
