# Python Pandas Tutorial (part 5): Updating Rows and Columns 
# Modifying data with Dataframes

In [62]:
people ={
    "first": ["corey", "jane", "john"],
    "last": ["schafer", "doe", "doe"],
    "email": ["coreyschafer@gmail.com", "janedoe@email.com", "johndoe@email.com"]
}

In [63]:
import pandas as pd

df = pd.DataFrame(people)

### Modify the columns

#### This changes all the columns

In [64]:

df.columns = ["first_name", "last_name", "email"]
df

Unnamed: 0,first_name,last_name,email
0,corey,schafer,coreyschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,doe,johndoe@email.com


#### List comprihension.  
to upper <code> .upper() .lower()</code>

In [65]:
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,corey,schafer,coreyschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,doe,johndoe@email.com


using <code>.replace(x,x1)</code>

In [66]:
df.columns = df.columns.str.replace(" ", "_")
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,corey,schafer,coreyschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,doe,johndoe@email.com


#### How to only change 'SOME COLUMNS'

In [67]:
df.rename(columns={"FIRST_NAME":"first", "LAST_NAME": "last"}, inplace = True)
df

Unnamed: 0,first,last,EMAIL
0,corey,schafer,coreyschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,doe,johndoe@email.com


### Update the data in the ROWS

#### How to update a single value?

In [68]:
#lets grab the row 2. Use the index
df.loc[2]

first                 john
last                   doe
EMAIL    johndoe@email.com
Name: 2, dtype: object

In [69]:
df.loc[2] = ["John", "Smith", "JohnSMith@gmail.com"]
df

Unnamed: 0,first,last,EMAIL
0,corey,schafer,coreyschafer@gmail.com
1,jane,doe,janedoe@email.com
2,John,Smith,JohnSMith@gmail.com


#### find the position


In [70]:
df.loc[1,['last',"EMAIL"]] = ("Doe", "JaneDoe@Gmail.com")
df

Unnamed: 0,first,last,EMAIL
0,corey,schafer,coreyschafer@gmail.com
1,jane,Doe,JaneDoe@Gmail.com
2,John,Smith,JohnSMith@gmail.com


In [71]:
df.loc[2,"last"] = "Doe"
df

Unnamed: 0,first,last,EMAIL
0,corey,schafer,coreyschafer@gmail.com
1,jane,Doe,JaneDoe@Gmail.com
2,John,Doe,JohnSMith@gmail.com


#### To change a single option, we can use the <code>.at()</code> method 
(not sure why people use this). Use loc and iloc(interger location)

In [72]:
df.at[1, "last"] = "doe"
df

Unnamed: 0,first,last,EMAIL
0,corey,schafer,coreyschafer@gmail.com
1,jane,doe,JaneDoe@Gmail.com
2,John,Doe,JohnSMith@gmail.com


### Warnings. 

In [73]:
#This is good for LOOK UP VALUES
filt = (df["EMAIL"] == "JaneDoe@Gmail.com")
df[filt]

Unnamed: 0,first,last,EMAIL
1,jane,doe,JaneDoe@Gmail.com


In [74]:
df[filt]['last']

1    doe
Name: last, dtype: object

In [75]:
# this is the ERROR
df[filt]['last'] = "Smith"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt]['last'] = "Smith"


In [76]:
# FILTER CHANGE
df.loc[filt,"last"] = 'Smith'
df

Unnamed: 0,first,last,EMAIL
0,corey,schafer,coreyschafer@gmail.com
1,jane,Smith,JaneDoe@Gmail.com
2,John,Doe,JohnSMith@gmail.com


In [77]:
#return all lowercase (CHANGE MULTIUPL ROWS)
df["EMAIL"] = df["EMAIL"].str.lower()

## More advance METHODS (4)  
<code>.apply() .map() .applymap() .replace</code>

### .apply()
#### is used for calling a function on values. Can be used on a DataFrame , or a Series object

<code>.apply()</code>

##### Using <code>.apply()</code> with series

In [78]:
# lets see the length of all our email addresses
df["EMAIL"].apply(len)

0    22
1    17
2    19
Name: EMAIL, dtype: int64

In [79]:
# can use it to update
def update_email(email):
    return email.upper()

In [80]:
df["EMAIL"].apply(update_email)

0    COREYSCHAFER@GMAIL.COM
1         JANEDOE@GMAIL.COM
2       JOHNSMITH@GMAIL.COM
Name: EMAIL, dtype: object

In [81]:
# lets assign
df["EMAIL"] = df["EMAIL"].apply(update_email)
df

Unnamed: 0,first,last,EMAIL
0,corey,schafer,COREYSCHAFER@GMAIL.COM
1,jane,Smith,JANEDOE@GMAIL.COM
2,John,Doe,JOHNSMITH@GMAIL.COM


In [82]:
# using LAMDA. Back to lower
df["EMAIL"] = df["EMAIL"].apply(lambda to_lower: to_lower.lower())
df

Unnamed: 0,first,last,EMAIL
0,corey,schafer,coreyschafer@gmail.com
1,jane,Smith,janedoe@gmail.com
2,John,Doe,johnsmith@gmail.com


#### Using <code>.apply()</code> with dataframes

In [83]:
#series
df["EMAIL"].apply(len)

0    22
1    17
2    19
Name: EMAIL, dtype: int64

In [84]:
#dataframe
df.apply(len)

first    3
last     3
EMAIL    3
dtype: int64

In [85]:
df.rename(columns={"EMAIL":"email"}, inplace = True)

In [86]:
df

Unnamed: 0,first,last,email
0,corey,schafer,coreyschafer@gmail.com
1,jane,Smith,janedoe@gmail.com
2,John,Doe,johnsmith@gmail.com


In [87]:
len(df["email"])

3

#### Using .apply for rows and columns

In [88]:
# more .apply
df.apply(len, axis="columns")

0    3
1    3
2    3
dtype: int64

In [89]:
df.apply(pd.Series.min)

first                      John
last                        Doe
email    coreyschafer@gmail.com
dtype: object

In [90]:
df.apply(pd.Series.max)

first                   jane
last                 schafer
email    johnsmith@gmail.com
dtype: object

In [91]:
df.apply(lambda x: x.min())

first                      John
last                        Doe
email    coreyschafer@gmail.com
dtype: object

In [92]:
df.apply(lambda x: x.max())

first                   jane
last                 schafer
email    johnsmith@gmail.com
dtype: object

### .applymap applies it on a individual value in the dataframe

In [93]:
df.applymap(len)

Unnamed: 0,first,last,email
0,5,7,22
1,4,5,17
2,4,3,19


In [94]:
#lets get all lowercase
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,corey,schafer,coreyschafer@gmail.com
1,jane,smith,janedoe@gmail.com
2,john,doe,johnsmith@gmail.com


#### <code>.map</code> ONLY WORKS IN A SERIES. SUPBSITTE ONE VALUE FOR ANOTHER

In [95]:
df['first'] = df['first'].map({"corey":"chris", "jane": "mary"})

In [96]:
df['first'].replace({"corey":"chris", "jane": "mary"})

0    chris
1     mary
2      NaN
Name: first, dtype: object

In [97]:
# using map
df

Unnamed: 0,first,last,email
0,chris,schafer,coreyschafer@gmail.com
1,mary,Smith,janedoe@gmail.com
2,,Doe,johnsmith@gmail.com


In [98]:
large_df = pd.read_csv("Pokemon.csv")
pd.set_option('display.max_rows' , None)
pd.set_option('display.max_columns', None)

In [99]:
large_df.head(2)

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False


In [100]:
large_df.rename(columns={"Name": "name"}, inplace = True)
large_df.head(0)

Unnamed: 0,#,name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary


In [101]:
large_df.rename(columns={"name":"Name"}, inplace = True)

In [102]:
large_df["Legendary"].map({False: "no", True: "Yes"})

0       no
1       no
2       no
3       no
4       no
5       no
6       no
7       no
8       no
9       no
10      no
11      no
12      no
13      no
14      no
15      no
16      no
17      no
18      no
19      no
20      no
21      no
22      no
23      no
24      no
25      no
26      no
27      no
28      no
29      no
30      no
31      no
32      no
33      no
34      no
35      no
36      no
37      no
38      no
39      no
40      no
41      no
42      no
43      no
44      no
45      no
46      no
47      no
48      no
49      no
50      no
51      no
52      no
53      no
54      no
55      no
56      no
57      no
58      no
59      no
60      no
61      no
62      no
63      no
64      no
65      no
66      no
67      no
68      no
69      no
70      no
71      no
72      no
73      no
74      no
75      no
76      no
77      no
78      no
79      no
80      no
81      no
82      no
83      no
84      no
85      no
86      no
87      no
88      no
89      no
90      no