# Load DataFrames

In [1]:
using CSV
using DataFrames


In [2]:
filename = "NeutralData/cars.csv"
df = CSV.read(filename, DataFrame)
first(df,3)
last(df,3)

Unnamed: 0_level_0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs
Unnamed: 0_level_1,String,String,Int64,Float64,String,Int64
1,dodge rampage,32.0,4,135.0,84,2295
2,ford ranger,28.0,4,120.0,79,2625
3,chevy s-10,31.0,4,119.0,82,2720


In [3]:
typeof(df)

DataFrame

## Limit number of data read from file

In [4]:
filename = "NeutralData/cars.csv"
colsIwannaUse = [:Name, :Cylinders]
df = CSV.read(filename, DataFrame; select=colsIwannaUse, limit=3)

Unnamed: 0_level_0,Name,Cylinders
Unnamed: 0_level_1,String,Int64
1,chevrolet chevelle malibu,8
2,buick skylark 320,8
3,plymouth satellite,8


## Delimiter comma and header in top row can, but need not be specified


In [5]:
df = CSV.read(filename, DataFrame; delim = ",", header=1)
first(df,3)

Unnamed: 0_level_0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower
Unnamed: 0_level_1,String,String,Int64,Float64,String
1,chevrolet chevelle malibu,18.0,8,307.0,130
2,buick skylark 320,15.0,8,350.0,165
3,plymouth satellite,18.0,8,318.0,150


## Converter - Does not (yet) exist in Julia

## Missing Values

In [15]:
df = CSV.read(filename, DataFrame; missingstrings=["NA"])
#first(df,na_filter=true);

## Dates

In [7]:
using Dates
filename = "NeutralData/co2.csv"
df = CSV.read(filename, DataFrame; dateformat="yyyy-mm-dd")

df[!, "DatePlus"]= df.Date + Dates.Day(180)
first(df,5)

Unnamed: 0_level_0,Date,CO2,adjusted CO2,DatePlus
Unnamed: 0_level_1,Date,Float64,Float64,Date
1,1958-03-01,315.7,314.44,1958-08-28
2,1958-04-01,317.46,315.16,1958-09-28
3,1958-05-01,317.51,314.71,1958-10-28
4,1958-07-01,315.86,315.19,1958-12-28
5,1958-08-01,314.93,316.19,1959-01-28


## Transposed Data

In [8]:
filename = "NeutralData/Covid_Data.csv"
df = CSV.read(filename, DataFrame; transpose=true, dateformat="mm/dd/yy", limit=13)
df


Unnamed: 0_level_0,Province/State,Column2,Column3,Column4,Column5,Column6,Column7
Unnamed: 0_level_1,String,String,String,String,String,String,String
1,Country/Region,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda
2,Lat,33.93911,41.1533,28.0339,42.5063,-11.2027,17.0608
3,Long,67.709953,20.1683,1.6596,1.5218,17.8739,-61.7964
4,1/22/20,0,0,0,0,0,0
5,1/23/20,0,0,0,0,0,0
6,1/24/20,0,0,0,0,0,0
7,1/25/20,0,0,0,0,0,0
8,1/26/20,0,0,0,0,0,0
9,1/27/20,0,0,0,0,0,0
10,1/28/20,0,0,0,0,0,0


In [9]:
colsIwannaUse = ["Province/State", "Column7"]
df = CSV.read(filename, DataFrame; transpose=true, dateformat="mm/dd/yy", select=colsIwannaUse, limit=13)
df

Unnamed: 0_level_0,Province/State,Column7
Unnamed: 0_level_1,String,String
1,Country/Region,Antigua and Barbuda
2,Lat,17.0608
3,Long,-61.7964
4,1/22/20,0
5,1/23/20,0
6,1/24/20,0
7,1/25/20,0
8,1/26/20,0
9,1/27/20,0
10,1/28/20,0
