# Updating Rows and Columns

In [1]:
import pandas as pd
import numpy as np 

In [2]:
#  dummy dictionary to convert it into dataframe

data = {
    'name': ['Alice', 'Bob', 'Charlie', 'David'],
    'age': [25, 32, 18, 47],
    'shape': ['New York', 'Paris', 'London', 'San Francisco']
}

In [6]:
data = pd.DataFrame(data)

In [3]:
df = pd.read_csv("CSVs\survey_results_public.csv")

## Changing Names of all the columns at once

In [8]:
# Let's first look at all the columns in our dummy data
data.columns

Index(['name', 'age', 'shape'], dtype='object')

In [23]:
# changing names of all the columns at once,
# Let's say I want to change the column name "shape" from "city" and I want to change "name" to "full_name" and
# keep age column same
# but the syntax below takes all column name, and if we want to change some specific column name we can do it with
# another syntax that we will talk later (without passing all column names)

data.columns = ["full_name", "age", "city"]

In [24]:
data.columns
# If I look into my data, column names are changed

Index(['full_name', 'age', 'city'], dtype='object')

## Altering something in all column names using list comprehension and str class of pandas
Let's say we want to upper case all the column names or, replace the spaces with the underscore, we can do it using the list comprehension, as well as str class   
- upper()  
- lower()  
- len()  
- title()  
- strip()    
- lstrip()  
- rstring()  
- replace()  
- split()  
- removesuffix()  
- removeprefix()  
- cat()  

We can use multiple useful methods from str class and useful link is below
https://pandas.pydata.org/docs/reference/api/pandas.Series.str.title.html
complete str class


https://pandas.pydata.org/docs/user_guide/text.html

In [25]:
data.columns = [x.upper() for x in data.columns]

In [26]:
data.columns

Index(['FULL_NAME', 'AGE', 'CITY'], dtype='object')

In [27]:
# WE can do the same thing using the str class of pandas

In [28]:
data.columns = data.columns.str.lower()
data.columns

Index(['full_name', 'age', 'city'], dtype='object')

## Altering the column names, by removing spaces and replacing them with _ or vice versa
For this purpose, we can use the list comprehension, but str class of pandas can do this work in more simpler syntax but we will explore both

In [29]:
# Let's first try it with List comprehension 
data.columns = [x.replace("_"," ") for x in data.columns]
data.columns

Index(['full name', 'age', 'city'], dtype='object')

In [30]:
# But I want to keep it same so, I will replace it back to the underscore, because 
# we cannot use dot notaion to access specific column of the dataframe if there is space in the column name
# And I will do this using the str class
data.columns = data.columns.str.replace(" ","_")
data.columns

Index(['full_name', 'age', 'city'], dtype='object')

## Renaming specific column names
We will use the rename() function and we need to set the "inplace" argument to "True" otherwise pandas will not replace the names of the columns on the original dataset, 
We need to pass the dictionary to the "columns" argument of rename method and pass the old name as key and new name as value

In [31]:
data.rename(columns={"first_name":"name"})
# if I look into the dataframe, pandas didnot change anything so we need to use the "inplace" argument

Unnamed: 0,full_name,age,city
0,Alice,25,New York
1,Bob,32,Paris
2,Charlie,18,London
3,David,47,San Francisco


In [34]:
data.rename(columns={"first_name":"name"}, inplace=True)
data.columns

Index(['full_name', 'age', 'city'], dtype='object')

## Updating all values of particular row

In [37]:
# Let's say I want to update the row 2 ["Charles", 18, "London"]
data.loc[2] = ["Mike", 18 , "Amesterdam"]

In [38]:
data

Unnamed: 0,full_name,age,city
0,Alice,25,New York
1,Bob,32,Paris
2,Mike,18,Amesterdam
3,David,47,San Francisco


## Updating specific column values for a single row
For this task we are familiar with the syntax

In [39]:
data.loc[2, ["full_name", "city"]] = ["Charles", "London"]

# By using this syntax we are telling pandas, the access the "full_name" and "city" columns and set their values 

In [40]:
data

Unnamed: 0,full_name,age,city
0,Alice,25,New York
1,Bob,32,Paris
2,Charles,18,London
3,David,47,San Francisco


In [41]:
# Changing single value
data.loc[2, "full_name"] = "Mike"
data

Unnamed: 0,full_name,age,city
0,Alice,25,New York
1,Bob,32,Paris
2,Mike,18,London
3,David,47,San Francisco


## at[] operator
Similar to loc, in that both provide label-based lookups. Use at if you only need to get or set a single value in a DataFrame or Series.

According to pandas documentation it is similar to the "loc" operator, and this operator is not deprecated yet, It means there could be a chance like "at" operator is more optimized to access single records than the loc but not sure (Instructors views)

- Main difference is, loc can handle single and multiple values but at can only handle single values

In [59]:
data.at[2, "full_name"] = "Charles"
data

Unnamed: 0,full_name,age,city
0,Alice,25,New York
1,Bob,32,Paris
2,Charles,18,London
3,David,47,San Francisco


## Warning in changing the value of specific column using filter

In [60]:
filt = data[data["full_name"] == "Charles"]
filt

Unnamed: 0,full_name,age,city
2,Charles,18,London


In [62]:
filt["fullname"] = "Mike"

# doing this way pandas gives us a warning, and the reason behind it can be studied in the documentation below

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filt["fullname"] = "Mike"


In [51]:
#but we can update the value this way

filt = (data["full_name"] == "Charles")
print(filt)
data.loc[filt,"full_name"] = "Mike"
data

0    False
1    False
2    False
3    False
Name: full_name, dtype: bool


Unnamed: 0,full_name,age,city
0,Alice,25,New York
1,Bob,32,Paris
2,Mike,18,London
3,David,47,San Francisco


## Changing data of all rows are specific column

In [64]:
data["full_name"].str.lower()

# this only returned the lower case values and didnot change the dataframe

0      alice
1        bob
2    charles
3      david
Name: full_name, dtype: object

In [65]:
data

Unnamed: 0,full_name,age,city
0,Alice,25,New York
1,Bob,32,Paris
2,Charles,18,London
3,David,47,San Francisco


In [67]:
# To change the values actually in the column we can assign it to that column
data["full_name"]= data["full_name"].str.lower()
data

Unnamed: 0,full_name,age,city
0,alice,25,New York
1,bob,32,Paris
2,charles,18,London
3,david,47,San Francisco


In [68]:
data["full_name"]= data["full_name"].str.title()
data

Unnamed: 0,full_name,age,city
0,Alice,25,New York
1,Bob,32,Paris
2,Charles,18,London
3,David,47,San Francisco


## Useful 4 methods to change the values of specific columns for all the rows like making full_name column values into lower case  

1. apply
2. map
3. applymap
4. replace

In [None]:
# These are important function and have complex results so we need to give full attention on how these function wo