In [1]:
import pandas as pd
df = pd.read_csv('data/survey_results_public.csv', index_col='Respondent')
schema_df = pd.read_csv('data/survey_results_schema.csv', index_col='Column')

dict_data = {
    'first': ['kaushal', 'pinkal', 'ananya', 'kartavya'],
    'last': ['khokhar', 'khokhar', 'patel', 'patel'],
    'email':['kaushal.123@gmail.com', 'pinal.123@gmail.com', 'anu.123@gmail.com', 'kartu.123@gmail.com']
}

dict_to_df = pd.DataFrame(dict_data)

In [2]:
dict_to_df.columns

Index(['first', 'last', 'email'], dtype='object')

In [3]:
# to assig column name
dict_to_df.columns = ['first_name', 'last_name', 'email']

In [4]:
dict_to_df

Unnamed: 0,first_name,last_name,email
0,kaushal,khokhar,kaushal.123@gmail.com
1,pinkal,khokhar,pinal.123@gmail.com
2,ananya,patel,anu.123@gmail.com
3,kartavya,patel,kartu.123@gmail.com


In [5]:
# uppercase column names
dict_to_df.columns = [x.upper() for x in dict_to_df.columns]

In [6]:
# dict_to_df.columns
dict_to_df.columns

Index(['FIRST_NAME', 'LAST_NAME', 'EMAIL'], dtype='object')

In [7]:
# to replace underscore in column name to space
dict_to_df.columns = dict_to_df.columns.str.replace('_', ' ')

In [8]:
dict_to_df.columns

Index(['FIRST NAME', 'LAST NAME', 'EMAIL'], dtype='object')

In [9]:
# to replace space in column name to underscore
dict_to_df.columns = dict_to_df.columns.str.replace(' ', '_')

In [10]:
dict_to_df.columns

Index(['FIRST_NAME', 'LAST_NAME', 'EMAIL'], dtype='object')

In [11]:
# to change a specific column name
dict_to_df.rename(columns={'FIRST_NAME': 'first', 'LAST_NAME': 'last',}, inplace=True)

In [12]:
dict_to_df

Unnamed: 0,first,last,EMAIL
0,kaushal,khokhar,kaushal.123@gmail.com
1,pinkal,khokhar,pinal.123@gmail.com
2,ananya,patel,anu.123@gmail.com
3,kartavya,patel,kartu.123@gmail.com


In [13]:
# lowercase column names
dict_to_df.columns = [x.lower() for x in dict_to_df.columns]

In [14]:
dict_to_df.loc[2, ['last', 'email']]

last                 patel
email    anu.123@gmail.com
Name: 2, dtype: object

In [15]:
# to chage column values
dict_to_df.loc[2, ['last', 'email']] = ['khokhar', 'ananya.123@gmail.com']

In [16]:
dict_to_df

Unnamed: 0,first,last,email
0,kaushal,khokhar,kaushal.123@gmail.com
1,pinkal,khokhar,pinal.123@gmail.com
2,ananya,khokhar,ananya.123@gmail.com
3,kartavya,patel,kartu.123@gmail.com


In [17]:
# Another Method to change column values.
dict_to_df.at[2, ['last', 'email']] = ['patel', 'anu.123@gmail.com']

In [18]:
dict_to_df

Unnamed: 0,first,last,email
0,kaushal,khokhar,kaushal.123@gmail.com
1,pinkal,khokhar,pinal.123@gmail.com
2,ananya,patel,anu.123@gmail.com
3,kartavya,patel,kartu.123@gmail.com


In [19]:
# We can only chage with .loc or .at
fltr = dict_to_df['first'] == 'kaushal'
dict_to_df[fltr]['first'] = 'kush' 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [20]:
# We can only chage with .loc or .at
fltr = dict_to_df['first'] == 'kaushal'
dict_to_df.loc[fltr, 'first'] = 'kush' 

In [21]:
dict_to_df

Unnamed: 0,first,last,email
0,kush,khokhar,kaushal.123@gmail.com
1,pinkal,khokhar,pinal.123@gmail.com
2,ananya,patel,anu.123@gmail.com
3,kartavya,patel,kartu.123@gmail.com


In [22]:
# TO Upper Case
dict_to_df["email"].str.upper()

0    KAUSHAL.123@GMAIL.COM
1      PINAL.123@GMAIL.COM
2        ANU.123@GMAIL.COM
3      KARTU.123@GMAIL.COM
Name: email, dtype: object

In [23]:
# to apply it
dict_to_df['email'] = dict_to_df["email"].str.upper()

In [24]:
dict_to_df

Unnamed: 0,first,last,email
0,kush,khokhar,KAUSHAL.123@GMAIL.COM
1,pinkal,khokhar,PINAL.123@GMAIL.COM
2,ananya,patel,ANU.123@GMAIL.COM
3,kartavya,patel,KARTU.123@GMAIL.COM


In [25]:
# apply fucntion on series object
dict_to_df['email'].apply(len)

0    21
1    19
2    17
3    19
Name: email, dtype: int64

In [26]:
def update_email(email):
    return email.lower()

In [27]:
dict_to_df['email'].apply(update_email)

0    kaushal.123@gmail.com
1      pinal.123@gmail.com
2        anu.123@gmail.com
3      kartu.123@gmail.com
Name: email, dtype: object

In [28]:
# not applied yet
dict_to_df

Unnamed: 0,first,last,email
0,kush,khokhar,KAUSHAL.123@GMAIL.COM
1,pinkal,khokhar,PINAL.123@GMAIL.COM
2,ananya,patel,ANU.123@GMAIL.COM
3,kartavya,patel,KARTU.123@GMAIL.COM


In [29]:
# to applt it
dict_to_df['email'] = dict_to_df['email'].apply(update_email)

In [30]:
# Now its done
dict_to_df

Unnamed: 0,first,last,email
0,kush,khokhar,kaushal.123@gmail.com
1,pinkal,khokhar,pinal.123@gmail.com
2,ananya,patel,anu.123@gmail.com
3,kartavya,patel,kartu.123@gmail.com


In [31]:
# Using lamda fuciton
dict_to_df['email'] = dict_to_df['email'].apply(lambda x: x.upper())

In [32]:
dict_to_df

Unnamed: 0,first,last,email
0,kush,khokhar,KAUSHAL.123@GMAIL.COM
1,pinkal,khokhar,PINAL.123@GMAIL.COM
2,ananya,patel,ANU.123@GMAIL.COM
3,kartavya,patel,KARTU.123@GMAIL.COM


In [33]:
# Apply fucntion on DataFrame objects
dict_to_df.applymap(len)

Unnamed: 0,first,last,email
0,4,7,21
1,6,7,19
2,6,5,17
3,8,5,19


In [34]:
dict_to_df = dict_to_df.applymap(str.lower)

In [35]:
dict_to_df

Unnamed: 0,first,last,email
0,kush,khokhar,kaushal.123@gmail.com
1,pinkal,khokhar,pinal.123@gmail.com
2,ananya,patel,anu.123@gmail.com
3,kartavya,patel,kartu.123@gmail.com


In [36]:
# Map method obly works with series object
dict_to_df['first'].map({'ananya': 'anu', 'kartavya': 'kartu'})

0      NaN
1      NaN
2      anu
3    kartu
Name: first, dtype: object

In [37]:
# Replace method
dict_to_df['first'].replace({'ananya': 'anu', 'kartavya': 'kartu'})

0      kush
1    pinkal
2       anu
3     kartu
Name: first, dtype: object

In [38]:
df['Hobbyist']

Respondent
1        Yes
2         No
3        Yes
4         No
5        Yes
        ... 
88377    Yes
88601     No
88802     No
88816     No
88863    Yes
Name: Hobbyist, Length: 88883, dtype: object

In [39]:
df['Hobbyist'].map({'Yes': 'True', 'No': 'False'})

Respondent
1         True
2        False
3         True
4        False
5         True
         ...  
88377     True
88601    False
88802    False
88816    False
88863     True
Name: Hobbyist, Length: 88883, dtype: object