In [1]:
import pandas as pd

df = pd.read_csv("Data/survey_results_public.csv")

In [2]:
people = {
    "first": ["Corey", "Jane", "John"],
    "last": ["Schafer", "Doe", "Doe"],
    "email": ["CoreyMS", "Dogs", "Cats"],
}


In [4]:
df2 = pd.DataFrame(people)
df2.columns

Index(['first', 'last', 'email'], dtype='object')

# RENAME COLUMN


In [9]:
df2.columns = [x.upper() for x in df2.columns]
df2.columns

Index(['FIRST', 'LAST', 'EMAIL'], dtype='object')

In [10]:
df2.rename(columns={"FIRST": "FirstName", "LAST": "LastName"}, inplace=True)
df2


Unnamed: 0,FirstName,LastName,EMAIL
0,Corey,Schafer,CoreyMS
1,Jane,Doe,Dogs
2,John,Doe,Cats


# UPDATING COLUMN VALUE

In [11]:
df2.loc[2, "EMAIL"] = "meow"
df2.loc[2]

FirstName    John
LastName      Doe
EMAIL        meow
Name: 2, dtype: object

In [12]:
df2.loc[2] = ["John", "Doe", "Moo"]
df2.loc[2]


FirstName    John
LastName      Doe
EMAIL         Moo
Name: 2, dtype: object

In [14]:
df2.loc[2, ["EMAIL", "LastName"]] = ["Cadsts", "Doe"]
df2.loc[2]

FirstName      John
LastName        Doe
EMAIL        Cadsts
Name: 2, dtype: object

In [15]:
df2["EMAIL"] = df2["EMAIL"].str.lower()
df2

Unnamed: 0,FirstName,LastName,EMAIL
0,Corey,Schafer,coreyms
1,Jane,Doe,dogs
2,John,Doe,cadsts


## APPLY VS APPLYMAP VS MAP VS REPLACE

In [16]:
# apply - apply a function to a series
df2["EMAIL"].apply(len) # returns the length of each email


0    7
1    4
2    6
Name: EMAIL, dtype: int64

In [19]:
def update_email(email):
    return email.upper()


df2["EMAIL"] = df2["EMAIL"].apply(update_email)
df2

Unnamed: 0,FirstName,LastName,EMAIL
0,Corey,Schafer,COREYMS
1,Jane,Doe,DOGS
2,John,Doe,CADSTS


In [21]:
df2.apply(len)  # returns the length of each column

FirstName    3
LastName     3
EMAIL        3
dtype: int64

In [22]:
df2.apply(len, axis="columns")
# returns the length of each row

0    3
1    3
2    3
dtype: int64

In [23]:
df2.apply(min)

FirstName     Corey
LastName        Doe
EMAIL        cadsts
dtype: object

In [20]:
# using lambda

df2["EMAIL"] = df2["EMAIL"].apply(lambda x: x.lower())
df2

Unnamed: 0,FirstName,LastName,EMAIL
0,Corey,Schafer,coreyms
1,Jane,Doe,dogs
2,John,Doe,cadsts


In [17]:
# applymap - apply a function to a dataframe
df2.applymap(len)  # returns the length of each cell

  df2.applymap(len)  # returns the length of each cell


Unnamed: 0,FirstName,LastName,EMAIL
0,5,7,7
1,4,3,4
2,4,3,6


In [26]:
# map - only works on a series - used to substitute each value in a series with another value
# but will replace all other values with NaN
df2["FirstName"] = df2["FirstName"].map({"Corey": "Chris", "Jane": "Mary"})
df2

Unnamed: 0,FirstName,LastName,EMAIL
0,Chris,Schafer,coreyms
1,Mary,Doe,dogs
2,,Doe,cadsts


In [29]:
# replace - used to substitute each value in a series with another value
# but will not replace all other values with NaN

df2["FirstName"] = df2["FirstName"].replace({"Chris": "Corey", "Mary": "Jane"})
df2

Unnamed: 0,FirstName,LastName,EMAIL
0,Corey,Schafer,coreyms
1,Jane,Doe,dogs
2,,Doe,cadsts


In [36]:
df["Convertedcomp"]

0            NaN
1            NaN
2         8820.0
3        61000.0
4            NaN
          ...   
88878        NaN
88879        NaN
88880        NaN
88881        NaN
88882        NaN
Name: Convertedcomp, Length: 88883, dtype: float64

In [43]:
df.rename(columns={"Convertedcomp": "SalaryUSD"}, inplace=True)
df.loc[0:5, "SalaryUSD"]

0         NaN
1         NaN
2      8820.0
3     61000.0
4         NaN
5    366420.0
Name: SalaryUSD, dtype: float64

In [44]:
df["Hobbyist"].map({"Yes": True, "No": False})

0         True
1        False
2         True
3        False
4         True
         ...  
88878     True
88879    False
88880    False
88881    False
88882     True
Name: Hobbyist, Length: 88883, dtype: bool