In [1]:
import pandas as pd

In [2]:
people = {
    'first': ['Corey', 'Jane', 'John'],
    'last': ['Schafer', 'Doe', 'Doe'],
    'email': ['Coreyschafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [3]:
ex = pd.DataFrame(people)

ex

Unnamed: 0,first,last,email
0,Corey,Schafer,Coreyschafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


# 

#  Updating Columns

In [5]:
ex.columns                           # Checking columns

Index(['first', 'last', 'email'], dtype='object')

In [8]:
# Let say I wanna make it firstname instead of first

ex.columns = ['first_name', 'last_name', 'email']          # This practice is used for changing names of all columns

In [10]:
ex

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,Coreyschafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


#### To uppercase all the column names

In [11]:
ex.columns = [x.upper() for x in ex.columns]
ex

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,Coreyschafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


####  To replace spaces with _ in column names

In [12]:
ex.columns = ex.columns.str.replace(' ','_')
ex

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,Coreyschafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


#### To lowercase all the column names

In [13]:
ex.columns = [x.lower() for x in ex.columns]
ex

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,Coreyschafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


#### To make effect to only ONE column

In [14]:
# We need to use dictionary here

ex.rename(columns={'first_name': 'first', 'last_name':'last'}, inplace=True)
ex

Unnamed: 0,first,last,email
0,Corey,Schafer,Coreyschafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


# 

# ---------      Updating Rows        --------

#### If I wanna change the lastname of John in our current DF

In [16]:
ex.loc[2] = ['John', 'Smith', 'JohnSmith@gmail.com']               # Method 1 (Not Recommended)
ex

Unnamed: 0,first,last,email
0,Corey,Schafer,Coreyschafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnSmith@gmail.com


In [20]:
ex.loc[2, ['last','email']]  = ['Doe', 'JohnDoe@email.com']         # Method 2 using loc (Recommended)
ex

Unnamed: 0,first,last,email
0,Corey,Schafer,Coreyschafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [21]:
ex.loc[2, 'last'] = 'Smith'
ex

Unnamed: 0,first,last,email
0,Corey,Schafer,Coreyschafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [22]:
ex.at[2, 'last'] = 'Doe'                             # Method 3 using at
ex

Unnamed: 0,first,last,email
0,Corey,Schafer,Coreyschafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


#### In case we have a large DF then we should use filter

In [25]:
filt = (ex['email'] == 'JohnDoe@email.com')
ex[filt]['last'] = 'Smith'                            # WRONG METHOD

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ex[filt]['last'] = 'Smith'


In [26]:
# This shows that we SHOULD use .loc or .at  to avoid error.

In [28]:
filt = (ex['email'] == 'JohnDoe@email.com')
ex.loc[filt,'last'] = 'Smith'                         # CORRECT METHOD
ex

Unnamed: 0,first,last,email
0,Corey,Schafer,Coreyschafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


#### Let say I wanna change all email addresses to lowercase

In [30]:
ex['email'].str.lower()                    # Not permanent
ex['email'] = ex['email'].str.lower()      # Permanent

ex

Unnamed: 0,first,last,email
0,Corey,Schafer,coreyschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


#### 

### Important Methods to perform modifying operations

##### 1. Apply
##### 2. Applymap
##### 3. Map
##### 4. Replace
##### ---------------------------

#### 1. Apply method

In [31]:
# Used for calling a function on our value. Can work on both DF and series object.

In [32]:
ex['email'].apply(len)           # It's showing us length of our emails.

0    22
1    17
2    17
Name: email, dtype: int64

In [33]:
def update_email(email):
    return email.upper()

ex['email'].apply(update_email)              # Upper casing our emails but it's not permanent yet

0    COREYSCHAFER@GMAIL.COM
1         JANEDOE@EMAIL.COM
2         JOHNDOE@EMAIL.COM
Name: email, dtype: object

In [34]:
ex['email'] = ex['email'].apply(update_email)
ex

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Smith,JOHNDOE@EMAIL.COM


##### Now moving them back to lowercase with LAMBDA function, it helps in not defining a function separately.

In [37]:
ex['email'] = ex['email'].apply(lambda x:x.lower())
ex

Unnamed: 0,first,last,email
0,Corey,Schafer,coreyschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


##### Using Apply with DataFrames

In [38]:
# It runs function on each and every row column of series within the dataframe

In [39]:
ex.apply(len)        # Doesn't work this way!

first    3
last     3
email    3
dtype: int64

In [40]:
ex.apply(pd.Series.min)           # It tells minimum value of each row/col of dataframe

first                     Corey
last                        Doe
email    coreyschafer@gmail.com
dtype: object

In [41]:
ex.apply(lambda x: x.min())        # We get same result 

first                     Corey
last                        Doe
email    coreyschafer@gmail.com
dtype: object

In [None]:
# Running APPLY on a series, applies fucntion to every value of series.
# Running APPLY on a DataFrame, applies function to each series.

#### 

#### 2. Applymap

In [43]:
# Used to apply function to every individual element in the DataFrame.  Notice the difference here. Here DF is applying function
# to all elements while in APPLY it was applying function to the series only. It's deeper.

# Series objects doesn't have applymap method

In [44]:
ex.applymap(len)

Unnamed: 0,first,last,email
0,5,7,22
1,4,3,17
2,4,5,17


In [45]:
ex.apply(len)

first    3
last     3
email    3
dtype: int64

#####  To turn all of your fields into lowercase at once...

In [48]:
ex.applymap(str.lower)

Unnamed: 0,first,last,email
0,corey,schafer,coreyschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,smith,johndoe@email.com


#### 

#### 3. Map

In [49]:
# Only works on Series. Map is used for substituting each value of series with another value.

##### Let say we want to substiute couple of our first names

In [50]:
ex['first'].map({'Corey': 'Chris', 'Jane': 'Mary'})       # Notice that the values we didn't substitute has become NaN value!

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

# 

#### 4. Replace

In [51]:
# Works like MAP but instead of making the unused fields as NaN, it keeps them untouched.

In [52]:
ex['first'].replace({'Corey': 'Chris', 'Jane': 'Mary'}) 

0    Chris
1     Mary
2     John
Name: first, dtype: object

In [53]:
ex['first'] = ex['first'].replace({'Corey': 'Chris', 'Jane': 'Mary'}) 
ex

Unnamed: 0,first,last,email
0,Chris,Schafer,coreyschafer@gmail.com
1,Mary,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


# 

### Let's move to larger dataset and see the application in real-life

In [54]:
df = pd.read_csv('data/survey_results_public.csv')
schema_df = pd.read_csv('data/survey_results_schema.csv')

In [55]:
pd.set_option('display.max_columns',85) 
pd.set_option('display.max_rows',85) 

#### 

#### Changing salary column name 

In [58]:
df.rename(columns={'ConvertedComp': 'SalaryUSD'}, inplace=True)

df['SalaryUSD']

0            NaN
1            NaN
2         8820.0
3        61000.0
4            NaN
          ...   
88878        NaN
88879        NaN
88880        NaN
88881        NaN
88882        NaN
Name: SalaryUSD, Length: 88883, dtype: float64

#### Changing  Yes to True and No to False in Hobbyist Column

In [59]:
df['Hobbyist'].map({'Yes': True, 'No': False})

0         True
1        False
2         True
3        False
4         True
         ...  
88878     True
88879    False
88880    False
88881    False
88882     True
Name: Hobbyist, Length: 88883, dtype: bool

In [60]:
df['Hobbyist'] = df['Hobbyist'].map({'Yes': True, 'No': False})
df.head(3)

Unnamed: 0,Respondent,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,EduOther,OrgSize,DevType,YearsCode,Age1stCode,YearsCodePro,CareerSat,JobSat,MgrIdiot,MgrMoney,MgrWant,JobSeek,LastHireDate,LastInt,FizzBuzz,JobFactors,ResumeUpdate,CurrencySymbol,CurrencyDesc,CompTotal,CompFreq,SalaryUSD,WorkWeekHrs,WorkPlan,WorkChallenge,WorkRemote,WorkLoc,ImpSyn,CodeRev,CodeRevHrs,UnitTests,PurchaseHow,PurchaseWhat,LanguageWorkedWith,LanguageDesireNextYear,DatabaseWorkedWith,DatabaseDesireNextYear,PlatformWorkedWith,PlatformDesireNextYear,WebFrameWorkedWith,WebFrameDesireNextYear,MiscTechWorkedWith,MiscTechDesireNextYear,DevEnviron,OpSys,Containers,BlockchainOrg,BlockchainIs,BetterLife,ITperson,OffOn,SocialMedia,Extraversion,ScreenName,SOVisit1st,SOVisitFreq,SOVisitTo,SOFindAnswer,SOTimeSaved,SOHowMuchTime,SOAccount,SOPartFreq,SOJobs,EntTeams,SOComm,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
0,1,I am a student who is learning to code,True,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,"Taught yourself a new language, framework, or ...",,,4.0,10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,HTML/CSS;Java;JavaScript;Python,C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL,SQLite,MySQL,MacOS;Windows,Android;Arduino;Windows,Django;Flask,Flask;jQuery,Node.js,Node.js,IntelliJ;Notepad++;PyCharm,Windows,I do not use containers,,,Yes,"Fortunately, someone else has that title",Yes,Twitter,Online,Username,2017,A few times per month or weekly,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,31-60 minutes,No,,"No, I didn't know that Stack Overflow had a jo...","No, and I don't know what those are",Neutral,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
1,2,I am a student who is learning to code,False,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,Taken an online course in programming or softw...,,"Developer, desktop or enterprise applications;...",,17,,,,,,,I am actively looking for a job,I've never had a job,,,Financial performance or funding status of the...,"Something else changed (education, award, medi...",,,,,,,,,,,,,,,,,C++;HTML/CSS;Python,C++;HTML/CSS;JavaScript;SQL,,MySQL,Windows,Windows,Django,Django,,,Atom;PyCharm,Windows,I do not use containers,,Useful across many domains and could change ma...,Yes,Yes,Yes,Instagram,Online,Username,2017,Daily or almost daily,Find answers to specific questions;Learn how t...,3-5 times per week,Stack Overflow was much faster,11-30 minutes,Yes,A few times per month or weekly,"No, I knew that Stack Overflow had a job board...","No, and I don't know what those are","Yes, somewhat",Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,3,"I am not primarily a developer, but I write co...",True,Never,The quality of OSS and closed source software ...,Employed full-time,Thailand,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,"Taught yourself a new language, framework, or ...",100 to 499 employees,"Designer;Developer, back-end;Developer, front-...",3.0,22,1.0,Slightly satisfied,Slightly satisfied,Not at all confident,Not sure,Not sure,"I’m not actively looking, but I am open to new...",1-2 years ago,Interview with people in peer roles,No,"Languages, frameworks, and other technologies ...",I was preparing for a job search,THB,Thai baht,23000.0,Monthly,8820.0,40.0,There's no schedule or spec; I work on what se...,Distracting work environment;Inadequate access...,Less than once per month / Never,Home,Average,No,,"No, but I think we should",Not sure,I have little or no influence,HTML/CSS,Elixir;HTML/CSS,PostgreSQL,PostgreSQL,,,,Other(s):,,,Vim;Visual Studio Code,Linux-based,I do not use containers,,,Yes,Yes,Yes,Reddit,In real life (in person),Username,2011,A few times per week,Find answers to specific questions;Learn how t...,6-10 times per week,They were about the same,,Yes,Less than once per month or monthly,Yes,"No, I've heard of them, but I am not part of a...",Neutral,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,28.0,Man,No,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult
