# Column Manipulation of Pandas DataFrames

### 1. Import libraries and dependencies

In [44]:
# Import pandas, pathlib, and numpy libraries
from pathlib import Path
import pandas as pd
import numpy as np

### 2. Create a Path to the File Using Pathlib

In [45]:
csvpath = Path("../Resources/people.csv")

### 3. Read the CSV into a Pandas DataFrame and Display a Few Rows

In [46]:
# Use the file path to read the CSV into a DataFrame and display a few rows
df = pd.read_csv(csvpath, index_col = 'id')
df.head()


Unnamed: 0_level_0,first_name,last_name,email,gender,uni_grad,job_title,Income
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0
2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0
3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0
4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0
5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0


### 4. View Columns Names

In [7]:
# Use the `columns` attribute to output the column names
df.columns

Index(['first_name', 'last_name', 'email', 'gender', 'uni_grad', 'job_title',
       'Income'],
      dtype='object')

In [8]:
df.describe(include='all')

Unnamed: 0,first_name,last_name,email,gender,uni_grad,job_title,Income
count,993,990,992,998,991,998,998.0
unique,914,974,991,2,939,183,
top,Ailbert,Raveau,ayang@codedrills.com,Male,University of Portland,Chief Design Engineer,
freq,3,2,2,508,3,16,
mean,,,,,,,84691.693387
std,,,,,,,20185.664319
min,,,,,,,50013.0
25%,,,,,,,67353.0
50%,,,,,,,84296.0
75%,,,,,,,102374.75


### 5. View Column Data Types

In [39]:
# Use the `dtypes` attribute to output the column names and data
df.dtypes

Person_ID      object
First_Name     object
Email          object
Gender         object
University     object
Occupation     object
Salary        float64
dtype: object

### 6. Rename Columns Using List

In [61]:
# Set the `columns` attribute to a new list of column names
columns = ["Person_ID", "First_Name", "Last_Name", "Email", "Gender", "University", "Occupation", "Salary"]
df.columns = columns

df.head()

Unnamed: 0_level_0,Person_ID,First_Name,Last_Name,Email,Gender,University,Occupation,Salary
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0,35
2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0,44
3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0,28
4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,50
5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0,51


### 7. Rename Columns Using Dictionary

In [62]:
df =df.rename(columns ={ "id":"Person_ID", "first_name": "First_Name", "last_name":"Last_Name","email": "Email", "gender":"Gender", "uni_grad":"University", "job_title" :"Occupation", "Income":"Salary" })
df.head()

Unnamed: 0_level_0,Person_ID,First_Name,Last_Name,Email,Gender,University,Occupation,Salary
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0,35
2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0,44
3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0,28
4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,50
5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0,51


In [68]:
# Use the `rename` function and set the `columns` parameter to a dictionary of new column names
df = df.rename(columns={"id": "Person_ID","first_name": "First_Name","last_name": "Last_Name","email": "Email","gender": "Gender","uni_grad": "University","job_title": "Occupation","Income": "Salary"})

df.head()

Unnamed: 0_level_0,Person_ID,Last_Name,First_Name,Gender,University,Occupation,Salary,Email,Age,Age_Copy
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1.0,Keriann,klenormand0@businessinsider.com,Lenormand,Aurora University,Nurse Practicioner,58135.0,35,Female,55,45
2.0,Huntley,hrupke1@reuters.com,Rupke,Osaka University of Economics,Project Manager,96053.0,44,Male,49,37
3.0,Gorden,gdalgarnowch2@microsoft.com,Dalgarnowch,Ludong University,Environmental Tech,59196.0,28,Male,22,47
4.0,Cullie,cputten3@nymag.com,,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,50,Male,50,23
5.0,Ariel,astrangman4@bravesites.com,Strangman,Boise State University,Project Manager,89073.0,51,Female,25,36


### 8. Re-order Columns

In [70]:
# Use a list of re-ordered column names to alter the column order of the original DataFrame
df = df[["Person_ID","Last_Name", "First_Name","Gender","University", "Occupation", "Salary", "Email"]]
df.head()

Unnamed: 0_level_0,Person_ID,Last_Name,First_Name,Gender,University,Occupation,Salary,Email
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1.0,Keriann,klenormand0@businessinsider.com,Lenormand,Aurora University,Nurse Practicioner,58135.0,35,Female
2.0,Huntley,hrupke1@reuters.com,Rupke,Osaka University of Economics,Project Manager,96053.0,44,Male
3.0,Gorden,gdalgarnowch2@microsoft.com,Dalgarnowch,Ludong University,Environmental Tech,59196.0,28,Male
4.0,Cullie,cputten3@nymag.com,,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,50,Male
5.0,Ariel,astrangman4@bravesites.com,Strangman,Boise State University,Project Manager,89073.0,51,Female


### 9. Create Additional Column

In [67]:
# Use the `randint` function to randomly generate an `Age` from 22 to 65 for 1000 rows
# Use the `randint` function to randomly generate an `Age` from 22 to 65 for 1000 rows

df["Age"] = np.random.randint(low=22, high=65, size=1000)
df["Age_Copy"] = np.random.randint(low=22, high=65, size=1000)
df.head()

Unnamed: 0_level_0,Person_ID,Last_Name,First_Name,Gender,University,Occupation,Salary,Email,Age,Age_Copy
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1.0,Keriann,klenormand0@businessinsider.com,Lenormand,Aurora University,Nurse Practicioner,58135.0,35,Female,55,45
2.0,Huntley,hrupke1@reuters.com,Rupke,Osaka University of Economics,Project Manager,96053.0,44,Male,49,37
3.0,Gorden,gdalgarnowch2@microsoft.com,Dalgarnowch,Ludong University,Environmental Tech,59196.0,28,Male,22,47
4.0,Cullie,cputten3@nymag.com,,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,50,Male,50,23
5.0,Ariel,astrangman4@bravesites.com,Strangman,Boise State University,Project Manager,89073.0,51,Female,25,36


### 10. Delete Columns

In [54]:

# Use the `drop` function to delete the newly created `Age` column
df = df.drop(columns=["Age_Copy"])
df.head()

Unnamed: 0_level_0,first_name,last_name,email,gender,uni_grad,job_title,Income,Age
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1.0,Keriann,Lenormand,klenormand0@businessinsider.com,Female,Aurora University,Nurse Practicioner,58135.0,35
2.0,Huntley,Rupke,hrupke1@reuters.com,Male,Osaka University of Economics,Project Manager,96053.0,44
3.0,Gorden,Dalgarnowch,gdalgarnowch2@microsoft.com,Male,Ludong University,Environmental Tech,59196.0,28
4.0,Cullie,,cputten3@nymag.com,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,50
5.0,Ariel,Strangman,astrangman4@bravesites.com,Female,Boise State University,Project Manager,89073.0,51


### 11. Save Modified DataFrame to New CSV

In [60]:
# Save the DataFrame to the `Resources` folder
df.to_csv("../Resources/people_reordered.csv")
