# Columns

## Get Started (Load a DataFrame)

In [1]:
import pandas as pd

In [2]:
filename = "Data_Applicants/aug_train.csv"
colsIwannaUse = ["Name", "Cylinders"]
df = pd.read_csv(filename, nrows=5)
df

Unnamed: 0,enrollee_id,city,city_development_index,gender,relevent_experience,enrolled_university,education_level,major_discipline,experience,company_size,company_type,last_new_job,training_hours,target
0,8949,city_103,0.92,Male,Has relevent experience,no_enrollment,Graduate,STEM,>20,,,1,36,1.0
1,29725,city_40,0.776,Male,No relevent experience,no_enrollment,Graduate,STEM,15,50-99,Pvt Ltd,>4,47,0.0
2,11561,city_21,0.624,,No relevent experience,Full time course,Graduate,STEM,5,,,never,83,0.0
3,33241,city_115,0.789,,No relevent experience,,Graduate,Business Degree,<1,,Pvt Ltd,never,52,1.0
4,666,city_162,0.767,Male,Has relevent experience,no_enrollment,Masters,STEM,>20,50-99,Funded Startup,4,8,0.0


In [3]:
filename = "Data_Applicants/aug_train.csv"
colsIwannaUse = ["enrollee_id", "city", "gender"]
df = pd.read_csv(filename, usecols = colsIwannaUse, nrows=5)
df

Unnamed: 0,enrollee_id,city,gender
0,8949,city_103,Male
1,29725,city_40,Male
2,11561,city_21,
3,33241,city_115,
4,666,city_162,Male


## Column Names - Get and Change

In [4]:
### Get column names ( = headers)
columnNames = df.columns.values
columnNames

array(['enrollee_id', 'city', 'gender'], dtype=object)

In [5]:
### Capitalize names (or minimalize, etc.)
columnNames = [x.capitalize() for x in columnNames]
columnNames

['Enrollee_id', 'City', 'Gender']

In [6]:
### Completely exchange all names
newColNames = ["Potential", "Location", "MF"]
df.columns = newColNames
df

Unnamed: 0,Potential,Location,MF
0,8949,city_103,Male
1,29725,city_40,Male
2,11561,city_21,
3,33241,city_115,
4,666,city_162,Male


In [7]:
### Just rename a specific column (or a few of them)
df = df.rename(columns={"Potential":"Enrollee"})
df

Unnamed: 0,Enrollee,Location,MF
0,8949,city_103,Male
1,29725,city_40,Male
2,11561,city_21,
3,33241,city_115,
4,666,city_162,Male


## Reorder Columns

In [8]:
# df = df[:, [2, 1]] ### Does not work in Python (but in Julia)

In [9]:
### Specify an arbitrary new order
newOrder =  ['MF', 'Location', 'Enrollee']
df = df[newOrder]
df

Unnamed: 0,MF,Location,Enrollee
0,Male,city_103,8949
1,Male,city_40,29725
2,,city_21,11561
3,,city_115,33241
4,Male,city_162,666


In [10]:
### Reverse order
dfN = df.iloc[:, ::-1]
dfN

Unnamed: 0,Enrollee,Location,MF
0,8949,city_103,Male
1,29725,city_40,Male
2,11561,city_21,
3,33241,city_115,
4,666,city_162,Male


## Columns Keeping | Removing | Slicing 

In [11]:
### You can either drop the unwanted column(s)
dropCols = ["Enrollee"]
dfN = df.drop(dropCols, axis = 1)
dfN

Unnamed: 0,MF,Location
0,Male,city_103
1,Male,city_40
2,,city_21
3,,city_115
4,Male,city_162


In [12]:
### or you can keep the wanted columns
keepCols = ['MF', 'Location']
dfN = df[keepCols]
dfN

Unnamed: 0,MF,Location
0,Male,city_103
1,Male,city_40
2,,city_21
3,,city_115
4,Male,city_162


In [13]:
### Keep columns from col 3 onwards
dfN = df.iloc[:,2:]
dfN

Unnamed: 0,Enrollee
0,8949
1,29725
2,11561
3,33241
4,666


## Adding Columns - Operating on Columns

In [14]:
df = pd.read_csv(filename, usecols = colsIwannaUse, nrows=5)
df["Comment"] = "Great"
import random
df["myScore"] = random.randint(0, 100)
df

Unnamed: 0,enrollee_id,city,gender,Comment,myScore
0,8949,city_103,Male,Great,71
1,29725,city_40,Male,Great,71
2,11561,city_21,,Great,71
3,33241,city_115,,Great,71
4,666,city_162,Male,Great,71


In [15]:
### Operate on a dedicated column
df.myScore = df.myScore * 17
df

Unnamed: 0,enrollee_id,city,gender,Comment,myScore
0,8949,city_103,Male,Great,1207
1,29725,city_40,Male,Great,1207
2,11561,city_21,,Great,1207
3,33241,city_115,,Great,1207
4,666,city_162,Male,Great,1207


In [16]:
### Adding a column with totals
### Obviously, only makes sense after indexing (removing) all non-numerical values
df.loc[:, 'Total'] = df.sum(axis=1) 
df

Unnamed: 0,enrollee_id,city,gender,Comment,myScore,Total
0,8949,city_103,Male,Great,1207,10156
1,29725,city_40,Male,Great,1207,30932
2,11561,city_21,,Great,1207,12768
3,33241,city_115,,Great,1207,34448
4,666,city_162,Male,Great,1207,1873


In [23]:
dfN = df.copy()
dfN.insert(0, column='Rate', value="OK")
dfN

Unnamed: 0,Rate,enrollee_id,city,gender,Comment,myScore,Total
0,OK,8949,city_103,Male,Great,1207,10156
1,OK,29725,city_40,Male,Great,1207,30932
2,OK,11561,city_21,,Great,1207,12768
3,OK,33241,city_115,,Great,1207,34448
4,OK,666,city_162,Male,Great,1207,1873
