# Column Manipulation of Pandas DataFrames

### 1. Import libraries and dependencies

In [3]:
# Import pandas, pathlib, and numpy libraries
import pandas as pd
import numpy as np
from pathlib import Path

### 2. Create a Path to the File Using Pathlib

In [4]:
# Use the Pathlib libary to set the path to the CSV
csvpath=('../Resources/people.csv')

### 3. Read the CSV into a Pandas DataFrame and Display a Few Rows

In [5]:
df = pd.read_csv(csvpath)

In [6]:
df.head()

Unnamed: 0,id,first_name,last_name,email,gender,uni_grad,job_title,Income
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0


In [3]:
# Use the file path to read the CSV into a DataFrame and display a few rows
df.head()

Unnamed: 0,id,first_name,last_name,email,gender,uni_grad,job_title,Income
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,Putten,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0


### 4. View Column Names

In [7]:
df.columns

Index(['id', 'first_name', 'last_name', 'email', 'gender', 'uni_grad',
       'job_title', 'Income'],
      dtype='object')

In [4]:
# Use the `columns` attribute to output the column names
df.columns

Index(['id', 'first_name', 'last_name', 'email', 'gender', 'uni_grad',
       'job_title', 'Income'],
      dtype='object')

### 5. View Column Data Types

In [8]:
df.dtypes

id            float64
first_name     object
last_name      object
email          object
gender         object
uni_grad       object
job_title      object
Income        float64
dtype: object

In [5]:
# Use the `dtypes` attribute to output the column names and data types
df.dtypes

id            float64
first_name     object
last_name      object
email          object
gender         object
uni_grad       object
job_title      object
Income        float64
dtype: object

### 6. Rename Columns Using List

In [9]:
newColumns = ['Person_ID', 'First_Name', 'Last_Name', 'Email', 'Gender', 'University', 'Occupation', 'Salary']

In [6]:
# Set the `columns` attribute to a new list of column names


Unnamed: 0,Person_ID,First_Name,Last_Name,Email,Gender,University,Occupation,Salary
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,Putten,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0


### 7. Rename Columns Using Dictionary

In [10]:
df.columns = newColumns

In [11]:
df.head()

Unnamed: 0,Person_ID,First_Name,Last_Name,Email,Gender,University,Occupation,Salary
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0


In [7]:
# Use the `rename` function and set the `columns` parameter to a dictionary of new column names


Unnamed: 0,Person_ID,First_Name,Last_Name,Email,Gender,University,Occupation,Salary
0,1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
1,2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
2,3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
3,4.0,Cullie,Putten,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
4,5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0


### 8. Re-order Columns

In [12]:
newdf = df[['Person_ID', 'Last_Name', 'First_Name', 'Gender', 'University', 'Occupation', 'Salary', 'Email']]

In [13]:
newdf.head()

Unnamed: 0,Person_ID,Last_Name,First_Name,Gender,University,Occupation,Salary,Email
0,1.0,Lenormand,Keriann,Female,Aurora University,Nurse Practicioner,58135.0,klenormand0@businessinsider.com
1,2.0,Rupke,Huntley,Male,Osaka University of Economics,Project Manager,96053.0,hrupke1@reuters.com
2,3.0,Dalgarnowch,Gorden,Male,Ludong University,Environmental Tech,59196.0,gdalgarnowch2@microsoft.com
3,4.0,,Cullie,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,cputten3@nymag.com
4,5.0,Strangman,Ariel,Female,Boise State University,Project Manager,89073.0,astrangman4@bravesites.com


In [8]:
# Use a list of re-ordered column names to alter the column order of the original DataFrame
newdf = df[['Person_ID', 'Last_Name', 'First_Name', 'Gender', 'University', 'Occupation', 'Salary', 'Email']]

Unnamed: 0,Person_ID,Last_Name,First_Name,Gender,University,Occupation,Salary,Email
0,1.0,Lenormand,Keriann,Female,Aurora University,Nurse Practicioner,58135.0,klenormand0@businessinsider.com
1,2.0,Rupke,Huntley,Male,Osaka University of Economics,Project Manager,96053.0,hrupke1@reuters.com
2,3.0,Dalgarnowch,Gorden,Male,Ludong University,Environmental Tech,59196.0,gdalgarnowch2@microsoft.com
3,4.0,Putten,Cullie,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,cputten3@nymag.com
4,5.0,Strangman,Ariel,Female,Boise State University,Project Manager,89073.0,astrangman4@bravesites.com


### 9. Create Additional Column

In [24]:
age = np.random.randint(low=22, high=65, size=1000)

In [25]:
age

array([38, 37, 23, 31, 28, 34, 59, 28, 22, 35, 51, 32, 63, 64, 58, 30, 30,
       57, 31, 25, 44, 33, 23, 56, 61, 38, 49, 23, 36, 36, 62, 23, 52, 61,
       62, 63, 46, 45, 51, 60, 30, 60, 62, 31, 46, 44, 41, 62, 22, 57, 61,
       60, 39, 46, 39, 31, 42, 59, 63, 42, 49, 56, 22, 47, 27, 31, 56, 53,
       32, 55, 49, 38, 47, 41, 35, 23, 48, 33, 57, 57, 41, 34, 44, 51, 43,
       25, 46, 50, 36, 42, 57, 49, 51, 62, 50, 60, 35, 52, 33, 29, 60, 31,
       52, 51, 60, 49, 24, 22, 31, 43, 48, 35, 35, 47, 32, 39, 26, 44, 44,
       47, 31, 26, 27, 53, 32, 36, 27, 40, 49, 37, 32, 45, 28, 36, 32, 32,
       55, 28, 42, 40, 63, 33, 50, 56, 49, 63, 26, 28, 39, 44, 25, 64, 25,
       59, 57, 26, 58, 56, 30, 22, 61, 24, 57, 30, 22, 38, 25, 35, 44, 54,
       45, 36, 24, 33, 59, 62, 23, 23, 62, 24, 48, 42, 55, 45, 28, 54, 54,
       29, 30, 47, 33, 61, 51, 39, 56, 34, 29, 29, 45, 28, 62, 39, 53, 62,
       41, 32, 54, 62, 35, 46, 43, 40, 25, 42, 40, 39, 46, 50, 39, 43, 43,
       23, 32, 28, 37, 46

In [18]:
newage = pd.DataFrame(age)

In [26]:
newdf['Age_Copy'] =np.random.randint(low=22, high=65, size=1000)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  newdf['Age_Copy'] =np.random.randint(low=22, high=65, size=1000)


In [27]:
newdf.head()

Unnamed: 0,Person_ID,Last_Name,First_Name,Gender,University,Occupation,Salary,Email,Age_Copy
0,1.0,Lenormand,Keriann,Female,Aurora University,Nurse Practicioner,58135.0,klenormand0@businessinsider.com,25
1,2.0,Rupke,Huntley,Male,Osaka University of Economics,Project Manager,96053.0,hrupke1@reuters.com,28
2,3.0,Dalgarnowch,Gorden,Male,Ludong University,Environmental Tech,59196.0,gdalgarnowch2@microsoft.com,61
3,4.0,,Cullie,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,cputten3@nymag.com,28
4,5.0,Strangman,Ariel,Female,Boise State University,Project Manager,89073.0,astrangman4@bravesites.com,40


In [29]:
newdf['age'] = newdf['Age_Copy']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  newdf['age'] = newdf['Age_Copy']


In [9]:
# Use the `randint` function to randomly generate an `Age` from 22 to 65 for 1000 rows


Unnamed: 0,Person_ID,Last_Name,First_Name,Gender,University,Occupation,Salary,Email,Age,Age_Copy
0,1.0,Lenormand,Keriann,Female,Aurora University,Nurse Practicioner,58135.0,klenormand0@businessinsider.com,27,42
1,2.0,Rupke,Huntley,Male,Osaka University of Economics,Project Manager,96053.0,hrupke1@reuters.com,22,51
2,3.0,Dalgarnowch,Gorden,Male,Ludong University,Environmental Tech,59196.0,gdalgarnowch2@microsoft.com,40,43
3,4.0,Putten,Cullie,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,cputten3@nymag.com,62,56
4,5.0,Strangman,Ariel,Female,Boise State University,Project Manager,89073.0,astrangman4@bravesites.com,47,45


### 10. Delete Columns

In [31]:
del newdf['Age_Copy']

In [32]:
newdf.head()

Unnamed: 0,Person_ID,Last_Name,First_Name,Gender,University,Occupation,Salary,Email,age
0,1.0,Lenormand,Keriann,Female,Aurora University,Nurse Practicioner,58135.0,klenormand0@businessinsider.com,25
1,2.0,Rupke,Huntley,Male,Osaka University of Economics,Project Manager,96053.0,hrupke1@reuters.com,28
2,3.0,Dalgarnowch,Gorden,Male,Ludong University,Environmental Tech,59196.0,gdalgarnowch2@microsoft.com,61
3,4.0,,Cullie,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,cputten3@nymag.com,28
4,5.0,Strangman,Ariel,Female,Boise State University,Project Manager,89073.0,astrangman4@bravesites.com,40


In [10]:
# Use the `drop` function to delete the newly created `Age_Copy` column


Unnamed: 0,Person_ID,Last_Name,First_Name,Gender,University,Occupation,Salary,Email,Age
0,1.0,Lenormand,Keriann,Female,Aurora University,Nurse Practicioner,58135.0,klenormand0@businessinsider.com,27
1,2.0,Rupke,Huntley,Male,Osaka University of Economics,Project Manager,96053.0,hrupke1@reuters.com,22
2,3.0,Dalgarnowch,Gorden,Male,Ludong University,Environmental Tech,59196.0,gdalgarnowch2@microsoft.com,40
3,4.0,Putten,Cullie,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,cputten3@nymag.com,62
4,5.0,Strangman,Ariel,Female,Boise State University,Project Manager,89073.0,astrangman4@bravesites.com,47


### 11. Save Modified DataFrame to New CSV

In [11]:
# Save the DataFrame to the `Resources` folder
