# Columns

## Get Started (Load a DataFrame)

In [1]:
using CSV
using DataFrames

In [2]:
filename = "Data_Applicants/aug_train.csv"
colsIwannaUse = ["enrollee_id", "city", "gender"]
df = CSV.read(filename, DataFrame; select=colsIwannaUse, limit=5)

Unnamed: 0_level_0,enrollee_id,city,gender
Unnamed: 0_level_1,Int64,String,String?
1,8949,city_103,Male
2,29725,city_40,Male
3,11561,city_21,missing
4,33241,city_115,missing
5,666,city_162,Male


## Column Names - Get and Rename


In [3]:
### Get column names ( = headers)
columnNames = names(df)

3-element Array{String,1}:
 "enrollee_id"
 "city"
 "gender"

In [4]:
### Capitalize names (or minimalize, etc.)
rename!(uppercasefirst, df )

Unnamed: 0_level_0,Enrollee_id,City,Gender
Unnamed: 0_level_1,Int64,String,String?
1,8949,city_103,Male
2,29725,city_40,Male
3,11561,city_21,missing
4,33241,city_115,missing
5,666,city_162,Male


In [5]:
### Completely exchange all names
newColNames = [:Potential, :Location, :MF]
rename!(df, newColNames)

Unnamed: 0_level_0,Potential,Location,MF
Unnamed: 0_level_1,Int64,String,String?
1,8949,city_103,Male
2,29725,city_40,Male
3,11561,city_21,missing
4,33241,city_115,missing
5,666,city_162,Male


In [6]:
### Just rename a specific column (or a few of them)
rename!(df, :MF => :Gender)

Unnamed: 0_level_0,Potential,Location,Gender
Unnamed: 0_level_1,Int64,String,String?
1,8949,city_103,Male
2,29725,city_40,Male
3,11561,city_21,missing
4,33241,city_115,missing
5,666,city_162,Male


In [7]:
### Rename a column at a specified position
rename!(df, 3 => :Gender)

Unnamed: 0_level_0,Potential,Location,Gender
Unnamed: 0_level_1,Int64,String,String?
1,8949,city_103,Male
2,29725,city_40,Male
3,11561,city_21,missing
4,33241,city_115,missing
5,666,city_162,Male


## Reorder Columns

In [8]:
dfN = df[!, [2, 1, 3]]

Unnamed: 0_level_0,Location,Potential,Gender
Unnamed: 0_level_1,String,Int64,String?
1,city_103,8949,Male
2,city_40,29725,Male
3,city_21,11561,missing
4,city_115,33241,missing
5,city_162,666,Male


In [9]:
#fN = df[!:[:Potential, :Location, :MF]] ### Does not work
dfN = select(df, [:Potential, :Gender, :Location])

Unnamed: 0_level_0,Potential,Gender,Location
Unnamed: 0_level_1,Int64,String?,String
1,8949,Male,city_103
2,29725,Male,city_40
3,11561,missing,city_21
4,33241,missing,city_115
5,666,Male,city_162


In [18]:
### Reverse order
# dfN = df[:,::-1]   ### a third place does not exist in Julia, but a reverse command
dfN = df[:,reverse(names(df))]

Unnamed: 0_level_0,Gender,Location,Potential,D
Unnamed: 0_level_1,String?,String,Int64,String
1,Male,city_103,8949,Great
2,Male,city_40,29725,Great
3,missing,city_21,11561,Great
4,missing,city_115,33241,Great
5,Male,city_162,666,Great


In [11]:
### Keep columns from col 3 onwards
dfN = df[:,3:end]

Unnamed: 0_level_0,Gender
Unnamed: 0_level_1,String?
1,Male
2,Male
3,missing
4,missing
5,Male


## Adding Columns - Operating on Columns

In [12]:
### Add to the end, a column with numbers (must have length of rows)
n = nrow(df)
dfN = copy(df)
dfN.A = 1:n
dfN

Unnamed: 0_level_0,Potential,Location,Gender,A
Unnamed: 0_level_1,Int64,String,String?,Int64
1,8949,city_103,Male,1
2,29725,city_40,Male,2
3,11561,city_21,missing,3
4,33241,city_115,missing,4
5,666,city_162,Male,5


In [13]:
dfN = copy(df)
dfN[!, :Rating] .= "Amazing"
dfN

Unnamed: 0_level_0,Potential,Location,Gender,Rating
Unnamed: 0_level_1,Int64,String,String?,String
1,8949,city_103,Male,Amazing
2,29725,city_40,Male,Amazing
3,11561,city_21,missing,Amazing
4,33241,city_115,missing,Amazing
5,666,city_162,Male,Amazing


In [14]:
### Add to the end, a column with the same string.
v = fill("Great", n)
dfN.Comment = v
dfN.Comment = fill("Great", n)
dfN

Unnamed: 0_level_0,Potential,Location,Gender,Rating,Comment
Unnamed: 0_level_1,Int64,String,String?,String,String
1,8949,city_103,Male,Amazing,Great
2,29725,city_40,Male,Amazing,Great
3,11561,city_21,missing,Amazing,Great
4,33241,city_115,missing,Amazing,Great
5,666,city_162,Male,Amazing,Great


In [15]:
### Insert the clumns at  a certain position a position
dfN = copy(df)
#insertcols!(df, 1, :B => [1,2,3,4,5])
dfN = insertcols!(df, 1, :D => v)

Unnamed: 0_level_0,D,Potential,Location,Gender
Unnamed: 0_level_1,String,Int64,String,String?
1,Great,8949,city_103,Male
2,Great,29725,city_40,Male
3,Great,11561,city_21,missing
4,Great,33241,city_115,missing
5,Great,666,city_162,Male


In [16]:
dfN = hcat(df,[1,2,3,4,5], makeunique=true)

Unnamed: 0_level_0,D,Potential,Location,Gender,x1
Unnamed: 0_level_1,String,Int64,String,String?,Int64
1,Great,8949,city_103,Male,1
2,Great,29725,city_40,Male,2
3,Great,11561,city_21,missing,3
4,Great,33241,city_115,missing,4
5,Great,666,city_162,Male,5
