# Column Manipulation

In [124]:
# Initial imports
import pandas as pd
from pathlib import Path

In [125]:
# Set the path to the CSV file
csvpath = Path("../Resources/customers.csv")

In [126]:
# Create a DataFrame from the CSV file
customer_dataframe = pd.read_csv(csvpath)

In [127]:
# Display the DataFrame's head
customer_dataframe.head()

Unnamed: 0,FullName,Email,Address,Zip,CreditCard,Balance
0,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,2524 2317 2139 4751,21511
1,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,4756 0997 9568 1329,13850
2,Jesusita Kinney,satsumas1954@yahoo.com,943 Gibb Highway,41535,3717 863466 48574,21254
3,Mose Gordon,antifowl1875@gmail.com,1073 Fell Trace,16098,5413 1700 6989 2835,5221
4,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,5173 4883 9215 4743,8300


### Replace Columns

In [128]:
# Display the current column names
customer_dataframe.columns
type(customer_dataframe.columns)

pandas.core.indexes.base.Index

In [129]:
customer_dataframe["Email"]

0     unhideable1966@gmail.com
1      allgood1803@outlook.com
2       satsumas1954@yahoo.com
3       antifowl1875@gmail.com
4     acetaminol1979@yahoo.com
                ...           
95       plantula1818@live.com
96    doolittle1818@yandex.com
97          mosso1961@live.com
98      ovenful1914@yandex.com
99        esson1909@yandex.com
Name: Email, Length: 100, dtype: object

In [130]:
type(customer_dataframe["Email"])

pandas.core.series.Series

In [131]:
# Set the new names for the columns
columns = ["Full Name", "Email", "Address", "Zip Code", "Credit Card Number", "Account Balance"]

In [132]:
# Replace the current names of the columns with the new names
customer_dataframe.columns = columns

In [133]:
# Display the DataFrame's head
customer_dataframe.head()

Unnamed: 0,Full Name,Email,Address,Zip Code,Credit Card Number,Account Balance
0,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,2524 2317 2139 4751,21511
1,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,4756 0997 9568 1329,13850
2,Jesusita Kinney,satsumas1954@yahoo.com,943 Gibb Highway,41535,3717 863466 48574,21254
3,Mose Gordon,antifowl1875@gmail.com,1073 Fell Trace,16098,5413 1700 6989 2835,5221
4,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,5173 4883 9215 4743,8300


### Rename Columns

The `rename` method can be used to replace or update column names selectively. Simply provide a dictionary to the column's parameter that has the following format:

```python
{
"Old Column Name": "New Column Name"
}
```

In [134]:
# Use the `rename` method to change the name of the columns
# need to changed dataframe to variable to persist the manipulation.
customer_dataframe = customer_dataframe.rename(columns={
    "Full Name": "full_name",
    "Credit Card Number": "credit_card_number",
    "other":"hello"
})

In [135]:
# Display the DataFrame's head
customer_dataframe.head()

Unnamed: 0,full_name,Email,Address,Zip Code,credit_card_number,Account Balance
0,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,2524 2317 2139 4751,21511
1,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,4756 0997 9568 1329,13850
2,Jesusita Kinney,satsumas1954@yahoo.com,943 Gibb Highway,41535,3717 863466 48574,21254
3,Mose Gordon,antifowl1875@gmail.com,1073 Fell Trace,16098,5413 1700 6989 2835,5221
4,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,5173 4883 9215 4743,8300


### Split Columns

In [136]:
customer_dataframe["full_name"] 

0       Altha Frederick
1       Nickolas Harvey
2       Jesusita Kinney
3           Mose Gordon
4       Cesar Valentine
            ...        
95       Crystle Larson
96      Jetta Davenport
97      Dallas Johnston
98    Roberto Daugherty
99         Abel Walters
Name: full_name, Length: 100, dtype: object

In [137]:
#customer_dataframe["full_name"].split(" ")
# above gives error
#names = customer_dataframe["full_name"].str.split(" ", expand = True)

names.head(2)

Unnamed: 0,0,1
0,Altha,Frederick
1,Nickolas,Harvey


In [138]:
customer_dataframe["first_name"] = names[0]
customer_dataframe["last_name"] = names[1]
customer_dataframe.head()


Unnamed: 0,full_name,Email,Address,Zip Code,credit_card_number,Account Balance,first_name,last_name
0,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,2524 2317 2139 4751,21511,Altha,Frederick
1,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,4756 0997 9568 1329,13850,Nickolas,Harvey
2,Jesusita Kinney,satsumas1954@yahoo.com,943 Gibb Highway,41535,3717 863466 48574,21254,Jesusita,Kinney
3,Mose Gordon,antifowl1875@gmail.com,1073 Fell Trace,16098,5413 1700 6989 2835,5221,Mose,Gordon
4,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,5173 4883 9215 4743,8300,Cesar,Valentine


In [139]:
customer_dataframe.loc[customer_dataframe["last_name"]== ' ']
customer_dataframe.loc[customer_dataframe["Account Balance"]== 8300]

Unnamed: 0,full_name,Email,Address,Zip Code,credit_card_number,Account Balance,first_name,last_name
4,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,5173 4883 9215 4743,8300,Cesar,Valentine


### select Columns

In [140]:
#new df created
test = customer_dataframe[['credit_card_number', 'Account Balance']]

In [141]:
test.head(2)

Unnamed: 0,credit_card_number,Account Balance
0,2524 2317 2139 4751,21511
1,4756 0997 9568 1329,13850


In [142]:
#no impact to existing df
customer_dataframe.head(2)

Unnamed: 0,full_name,Email,Address,Zip Code,credit_card_number,Account Balance,first_name,last_name
0,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,2524 2317 2139 4751,21511,Altha,Frederick
1,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,4756 0997 9568 1329,13850,Nickolas,Harvey


### Reorder Columns

In [143]:
type(customer_dataframe['credit_card_number'])
#customer_dataframe['credit_card_number']

pandas.core.series.Series

In [144]:
type(customer_dataframe[['credit_card_number']])
#customer_dataframe[['credit_card_number']]


pandas.core.frame.DataFrame

In [145]:
# Reorder the columns by creating a new DataFrame
customer_dataframe = customer_dataframe[['credit_card_number', 'Account Balance', 'full_name', 'Email', 'Address', 'Zip Code']]

In [146]:
# Display the DataFrame's head
customer_dataframe.head()

Unnamed: 0,credit_card_number,Account Balance,full_name,Email,Address,Zip Code
0,2524 2317 2139 4751,21511,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353
1,4756 0997 9568 1329,13850,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922
2,3717 863466 48574,21254,Jesusita Kinney,satsumas1954@yahoo.com,943 Gibb Highway,41535
3,5413 1700 6989 2835,5221,Mose Gordon,antifowl1875@gmail.com,1073 Fell Trace,16098
4,5173 4883 9215 4743,8300,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895


### Create Columns

In [147]:
# Add a new column to the DataFrame
customer_dataframe["Balance (1k)"] = customer_dataframe["Account Balance"] / 1000

In [148]:
# Display the DataFrame's head
customer_dataframe.head()

Unnamed: 0,credit_card_number,Account Balance,full_name,Email,Address,Zip Code,Balance (1k)
0,2524 2317 2139 4751,21511,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,21.511
1,4756 0997 9568 1329,13850,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,13.85
2,3717 863466 48574,21254,Jesusita Kinney,satsumas1954@yahoo.com,943 Gibb Highway,41535,21.254
3,5413 1700 6989 2835,5221,Mose Gordon,antifowl1875@gmail.com,1073 Fell Trace,16098,5.221
4,5173 4883 9215 4743,8300,Cesar Valentine,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,8.3


In [149]:
customer_dataframe["large_balance"] = (customer_dataframe["Account Balance"] > 10000)

In [150]:
customer_dataframe.head(2)

Unnamed: 0,credit_card_number,Account Balance,full_name,Email,Address,Zip Code,Balance (1k),large_balance
0,2524 2317 2139 4751,21511,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,21.511,True
1,4756 0997 9568 1329,13850,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,13.85,True


In [151]:
#overwritten the column
customer_dataframe["large_balance"] = customer_dataframe["large_balance"]*0

In [152]:
customer_dataframe.head(2)

Unnamed: 0,credit_card_number,Account Balance,full_name,Email,Address,Zip Code,Balance (1k),large_balance
0,2524 2317 2139 4751,21511,Altha Frederick,unhideable1966@gmail.com,67 John Maher Extension,31353,21.511,0
1,4756 0997 9568 1329,13850,Nickolas Harvey,allgood1803@outlook.com,1200 Madera Plaza,1922,13.85,0


### Delete Columns

In [153]:
# Use the `drop` method to delete a column from the `customer_dataframe`
customer_dataframe = customer_dataframe.drop(columns=["full_name", "large_balance"])

In [154]:
# Display the DataFrame's head
customer_dataframe.head()

Unnamed: 0,credit_card_number,Account Balance,Email,Address,Zip Code,Balance (1k)
0,2524 2317 2139 4751,21511,unhideable1966@gmail.com,67 John Maher Extension,31353,21.511
1,4756 0997 9568 1329,13850,allgood1803@outlook.com,1200 Madera Plaza,1922,13.85
2,3717 863466 48574,21254,satsumas1954@yahoo.com,943 Gibb Highway,41535,21.254
3,5413 1700 6989 2835,5221,antifowl1875@gmail.com,1073 Fell Trace,16098,5.221
4,5173 4883 9215 4743,8300,acetaminol1979@yahoo.com,805 Marshall Promenade,99895,8.3
