Updating Rows and Columns : Modifying data frame

In [1]:
import pandas as pd

In [2]:
# Working with example dataframe
people = {
    "first" : ["Anish", "Ramish", "Samish", "Bamish", "Bamish"],
    "last" : ["Khadka", "Mainali", "Shrestha", "Karki", "Mainali"],
    "email" : ["anishramish56@gmail.com", "mainaliramish89@gmail.com", 
               "shresthasamish28@gmail.com", "bamishkarki819@gmail.com",
               "bamishmainali78@gmail.com"]
}
mydf = pd.DataFrame(people)
mydf

Unnamed: 0,first,last,email
0,Anish,Khadka,anishramish56@gmail.com
1,Ramish,Mainali,mainaliramish89@gmail.com
2,Samish,Shrestha,shresthasamish28@gmail.com
3,Bamish,Karki,bamishkarki819@gmail.com
4,Bamish,Mainali,bamishmainali78@gmail.com


In [3]:
# Updating columns first
mydf.columns

Index(['first', 'last', 'email'], dtype='object')

In [4]:
# renaming all columns, but for specific columns there is another way
mydf.columns = ['first_name', 'last_name', 'email']
mydf

Unnamed: 0,first_name,last_name,email
0,Anish,Khadka,anishramish56@gmail.com
1,Ramish,Mainali,mainaliramish89@gmail.com
2,Samish,Shrestha,shresthasamish28@gmail.com
3,Bamish,Karki,bamishkarki819@gmail.com
4,Bamish,Mainali,bamishmainali78@gmail.com


In [5]:
# using list comprehension
mydf.columns = [x.upper() for x in mydf.columns]
mydf

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Anish,Khadka,anishramish56@gmail.com
1,Ramish,Mainali,mainaliramish89@gmail.com
2,Samish,Shrestha,shresthasamish28@gmail.com
3,Bamish,Karki,bamishkarki819@gmail.com
4,Bamish,Mainali,bamishmainali78@gmail.com


In [None]:
# Better to use _ in place of space in column names
mydf.columns = mydf.columns.str.replace(' ', '_')
mydf

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Anish,Khadka,anishramish56@gmail.com
1,Ramish,Mainali,mainaliramish89@gmail.com
2,Samish,Shrestha,shresthasamish28@gmail.com
3,Bamish,Karki,bamishkarki819@gmail.com
4,Bamish,Mainali,bamishmainali78@gmail.com


In [7]:
mydf.columns = [x.lower() for x in mydf.columns]
mydf

Unnamed: 0,first_name,last_name,email
0,Anish,Khadka,anishramish56@gmail.com
1,Ramish,Mainali,mainaliramish89@gmail.com
2,Samish,Shrestha,shresthasamish28@gmail.com
3,Bamish,Karki,bamishkarki819@gmail.com
4,Bamish,Mainali,bamishmainali78@gmail.com


In [8]:
# Renaming only few columns
mydf.rename(columns={'first_name' : 'firstName', 'last_name' : 'lastName'}, inplace=True)
mydf

Unnamed: 0,firstName,lastName,email
0,Anish,Khadka,anishramish56@gmail.com
1,Ramish,Mainali,mainaliramish89@gmail.com
2,Samish,Shrestha,shresthasamish28@gmail.com
3,Bamish,Karki,bamishkarki819@gmail.com
4,Bamish,Mainali,bamishmainali78@gmail.com


Updating data in rows

In [9]:
# updating single value 
mydf.loc[2] = ['Harish', 'Kansakar', 'kansakarharish23@gmail.com']
mydf

Unnamed: 0,firstName,lastName,email
0,Anish,Khadka,anishramish56@gmail.com
1,Ramish,Mainali,mainaliramish89@gmail.com
2,Harish,Kansakar,kansakarharish23@gmail.com
3,Bamish,Karki,bamishkarki819@gmail.com
4,Bamish,Mainali,bamishmainali78@gmail.com


In [10]:
# updating only a subset of columns
mydf.loc[2, ['lastName', 'email']] = ['Shankhar', 'harishshankar78@gmail.com']
mydf

Unnamed: 0,firstName,lastName,email
0,Anish,Khadka,anishramish56@gmail.com
1,Ramish,Mainali,mainaliramish89@gmail.com
2,Harish,Shankhar,harishshankar78@gmail.com
3,Bamish,Karki,bamishkarki819@gmail.com
4,Bamish,Mainali,bamishmainali78@gmail.com


In [11]:
# using .at()
mydf.at[2, ['lastName', 'email']] = ['Ravindra', 'harishravindra89@gmail.com']
mydf

Unnamed: 0,firstName,lastName,email
0,Anish,Khadka,anishramish56@gmail.com
1,Ramish,Mainali,mainaliramish89@gmail.com
2,Harish,Ravindra,harishravindra89@gmail.com
3,Bamish,Karki,bamishkarki819@gmail.com
4,Bamish,Mainali,bamishmainali78@gmail.com


In [12]:
# using filtering technique
filt = (mydf['lastName'] == 'Karki')
mydf.loc[filt, ['lastName', 'email']] = ['Basnet', 'bamishbasnet12@gmail.com']
mydf

Unnamed: 0,firstName,lastName,email
0,Anish,Khadka,anishramish56@gmail.com
1,Ramish,Mainali,mainaliramish89@gmail.com
2,Harish,Ravindra,harishravindra89@gmail.com
3,Bamish,Basnet,bamishbasnet12@gmail.com
4,Bamish,Mainali,bamishmainali78@gmail.com


In [13]:
# updating multiple rows
mydf['email'] = mydf['email'].str.upper()
mydf

Unnamed: 0,firstName,lastName,email
0,Anish,Khadka,ANISHRAMISH56@GMAIL.COM
1,Ramish,Mainali,MAINALIRAMISH89@GMAIL.COM
2,Harish,Ravindra,HARISHRAVINDRA89@GMAIL.COM
3,Bamish,Basnet,BAMISHBASNET12@GMAIL.COM
4,Bamish,Mainali,BAMISHMAINALI78@GMAIL.COM


In [14]:
mydf['email'] = mydf['email'].str.lower()
# this is a simple and basic idea
mydf

Unnamed: 0,firstName,lastName,email
0,Anish,Khadka,anishramish56@gmail.com
1,Ramish,Mainali,mainaliramish89@gmail.com
2,Harish,Ravindra,harishravindra89@gmail.com
3,Bamish,Basnet,bamishbasnet12@gmail.com
4,Bamish,Mainali,bamishmainali78@gmail.com


In [19]:
# Four methods for such work are : apply(), map(), applymap(), replace()

# 1. apply() used for calling a function for data values, works on both DF  and series
mydf['email'].apply(len) # len function is applied to series

0    23
1    25
2    26
3    24
4    25
Name: email, dtype: int64

In [23]:
def update_email(email):
    return email.upper()

mydf['email'] = mydf['email'].apply(update_email)
mydf

Unnamed: 0,firstName,lastName,email
0,Anish,Khadka,ANISHRAMISH56@GMAIL.COM
1,Ramish,Mainali,MAINALIRAMISH89@GMAIL.COM
2,Harish,Ravindra,HARISHRAVINDRA89@GMAIL.COM
3,Bamish,Basnet,BAMISHBASNET12@GMAIL.COM
4,Bamish,Mainali,BAMISHMAINALI78@GMAIL.COM


In [29]:
# change back to lowercase email
mydf['email'] = mydf['email'].apply(lambda x: x.lower())
mydf

Unnamed: 0,firstName,lastName,email
0,Anish,Khadka,anishramish56@gmail.com
1,Ramish,Mainali,mainaliramish89@gmail.com
2,Harish,Ravindra,harishravindra89@gmail.com
3,Bamish,Basnet,bamishbasnet12@gmail.com
4,Bamish,Mainali,bamishmainali78@gmail.com


In [31]:
# Apply with dataframes
# For series apply was run for each value, whereas with DF it runs on each row or column
mydf.apply(len) # number of rows for each column
mydf.apply(len, axis='columns') # no of columns for each row


0    3
1    3
2    3
3    3
4    3
dtype: int64

In [32]:
mydf.apply(pd.Series.min) # apply on dataframe works by applying fn on every series on DF

firstName                      Anish
lastName                      Basnet
email        anishramish56@gmail.com
dtype: object

In [33]:
mydf.apply(lambda x : x.min()) # on series apply works on every value of the series

firstName                      Anish
lastName                      Basnet
email        anishramish56@gmail.com
dtype: object

In [34]:
# 2. Applymap : use apply on each value of dataframe. Works only with dataframe
mydf.applymap(len)

Unnamed: 0,firstName,lastName,email
0,5,6,23
1,6,7,25
2,6,8,26
3,6,6,24
4,6,7,25


In [40]:
# make every value lowercase
mydf.applymap(str.lower)

Unnamed: 0,firstName,lastName,email
0,anish,khadka,anishramish56@gmail.com
1,ramish,mainali,mainaliramish89@gmail.com
2,harish,ravindra,harishravindra89@gmail.com
3,bamish,basnet,bamishbasnet12@gmail.com
4,bamish,mainali,bamishmainali78@gmail.com


In [42]:
# 3. map  : substitute each value in series with another value
mydf['firstName'].map({'anish' : 'Rajendra'})

0    Rajendra
1         NaN
2         NaN
3         NaN
4         NaN
Name: firstName, dtype: object

In [44]:
# other values became NaN with map if the mapping is not complete
# replace() method is used for such case
mydf['firstName'] = mydf['firstName'].replace({'anish' : 'Rajendra'})
mydf

Unnamed: 0,firstName,lastName,email
0,Rajendra,khadka,anishramish56@gmail.com
1,ramish,mainali,mainaliramish89@gmail.com
2,harish,ravindra,harishravindra89@gmail.com
3,bamish,basnet,bamishbasnet12@gmail.com
4,bamish,mainali,bamishmainali78@gmail.com


In [16]:
# Working with real data
# Filtering on real data 
df = pd.read_csv('Data/stack-overflow-developer-survey-2024/survey_results_public.csv', index_col='ResponseId')
schema_df = pd.read_csv('Data/stack-overflow-developer-survey-2024/survey_results_schema.csv', index_col='qname')

In [17]:
pd.set_option('display.max_columns', 85)
pd.set_option('display.max_rows', 85)
df.head()

Unnamed: 0_level_0,MainBranch,Age,Employment,RemoteWork,Check,CodingActivities,EdLevel,LearnCode,LearnCodeOnline,TechDoc,YearsCode,YearsCodePro,DevType,OrgSize,PurchaseInfluence,BuyNewTool,BuildvsBuy,TechEndorse,Country,Currency,CompTotal,LanguageHaveWorkedWith,LanguageWantToWorkWith,LanguageAdmired,DatabaseHaveWorkedWith,DatabaseWantToWorkWith,DatabaseAdmired,PlatformHaveWorkedWith,PlatformWantToWorkWith,PlatformAdmired,WebframeHaveWorkedWith,WebframeWantToWorkWith,WebframeAdmired,EmbeddedHaveWorkedWith,EmbeddedWantToWorkWith,EmbeddedAdmired,MiscTechHaveWorkedWith,MiscTechWantToWorkWith,MiscTechAdmired,ToolsTechHaveWorkedWith,ToolsTechWantToWorkWith,ToolsTechAdmired,...,AINextNo change,AINextMore integrated,AINextLess integrated,AINextMuch less integrated,AIThreat,AIEthics,AIChallenges,TBranch,ICorPM,WorkExp,Knowledge_1,Knowledge_2,Knowledge_3,Knowledge_4,Knowledge_5,Knowledge_6,Knowledge_7,Knowledge_8,Knowledge_9,Frequency_1,Frequency_2,Frequency_3,TimeSearching,TimeAnswering,Frustration,ProfessionalTech,ProfessionalCloud,ProfessionalQuestion,Industry,JobSatPoints_1,JobSatPoints_4,JobSatPoints_5,JobSatPoints_6,JobSatPoints_7,JobSatPoints_8,JobSatPoints_9,JobSatPoints_10,JobSatPoints_11,SurveyLength,SurveyEase,ConvertedCompYearly,JobSat
ResponseId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1
1,I am a developer by profession,Under 18 years old,"Employed, full-time",Remote,Apples,Hobby,Primary/elementary school,Books / Physical media,,,,,,,,,,,United States of America,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,No,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,I am a developer by profession,35-44 years old,"Employed, full-time",Remote,Apples,Hobby;Contribute to open-source projects;Other...,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Books / Physical media;Colleague;On the job tr...,Technical documentation;Blogs;Books;Written Tu...,API document(s) and/or SDK document(s);User gu...,20.0,17.0,"Developer, full-stack",,,,,,United Kingdom of Great Britain and Northern I...,,,Bash/Shell (all shells);Go;HTML/CSS;Java;JavaS...,Bash/Shell (all shells);Go;HTML/CSS;Java;JavaS...,Bash/Shell (all shells);Go;HTML/CSS;Java;JavaS...,Dynamodb;MongoDB;PostgreSQL,PostgreSQL,PostgreSQL,Amazon Web Services (AWS);Heroku;Netlify,Amazon Web Services (AWS);Heroku;Netlify,Amazon Web Services (AWS);Heroku;Netlify,Express;Next.js;Node.js;React,Express;Htmx;Node.js;React;Remix,Express;Node.js;React,,,,,,,Docker;Homebrew;Kubernetes;npm;Vite;Webpack,Docker;Homebrew;Kubernetes;npm;Vite;Webpack,Docker;Homebrew;Kubernetes;npm;Vite;Webpack,...,,,,,,,,Yes,Individual contributor,17.0,Agree,Disagree,Agree,Agree,Agree,Neither agree nor disagree,Disagree,Agree,Agree,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
3,I am a developer by profession,45-54 years old,"Employed, full-time",Remote,Apples,Hobby;Contribute to open-source projects;Other...,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",Books / Physical media;Colleague;On the job tr...,Technical documentation;Blogs;Books;Written Tu...,API document(s) and/or SDK document(s);User gu...,37.0,27.0,Developer Experience,,,,,,United Kingdom of Great Britain and Northern I...,,,C#,C#,C#,Firebase Realtime Database,Firebase Realtime Database,Firebase Realtime Database,Google Cloud,Google Cloud,Google Cloud,ASP.NET CORE,ASP.NET CORE,ASP.NET CORE,Rasberry Pi,Rasberry Pi,Rasberry Pi,.NET (5+) ;.NET Framework (1.0 - 4.8);.NET MAUI,.NET (5+) ;.NET Framework (1.0 - 4.8);.NET MAUI,.NET (5+) ;.NET Framework (1.0 - 4.8);.NET MAUI,MSBuild,MSBuild,MSBuild,...,,,,,,,,No,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Appropriate in length,Easy,,
4,I am learning to code,18-24 years old,"Student, full-time",,Apples,,Some college/university study without earning ...,"Other online resources (e.g., videos, blogs, f...",Stack Overflow;How-to videos;Interactive tutorial,,4.0,,"Developer, full-stack",,,,,,Canada,,,C;C++;HTML/CSS;Java;JavaScript;PHP;PowerShell;...,HTML/CSS;Java;JavaScript;PowerShell;Python;SQL...,HTML/CSS;Java;JavaScript;PowerShell;Python;SQL...,MongoDB;MySQL;PostgreSQL;SQLite,MongoDB;MySQL;PostgreSQL,MongoDB;MySQL;PostgreSQL,Amazon Web Services (AWS);Fly.io;Heroku,Amazon Web Services (AWS);Vercel,Amazon Web Services (AWS),jQuery;Next.js;Node.js;React;WordPress,jQuery;Next.js;Node.js;React,jQuery;Next.js;Node.js;React,Rasberry Pi,,,NumPy;Pandas;Ruff;TensorFlow,,,Docker;npm;Pip,Docker;Kubernetes;npm,Docker;npm,...,,,,,No,Circulating misinformation or disinformation;M...,Don’t trust the output or answers,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Too long,Easy,,
5,I am a developer by profession,18-24 years old,"Student, full-time",,Apples,,"Secondary school (e.g. American high school, G...","Other online resources (e.g., videos, blogs, f...",Technical documentation;Blogs;Written Tutorial...,API document(s) and/or SDK document(s);User gu...,9.0,,"Developer, full-stack",,,,,,Norway,,,C++;HTML/CSS;JavaScript;Lua;Python;Rust,C++;HTML/CSS;JavaScript;Lua;Python,C++;HTML/CSS;JavaScript;Lua;Python,PostgreSQL;SQLite,PostgreSQL;SQLite,PostgreSQL;SQLite,,,,,,,CMake;Cargo;Rasberry Pi,CMake;Rasberry Pi,CMake;Rasberry Pi,,,,APT;Make;npm,APT;Make,APT;Make,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Too short,Easy,,
