# Examples showing how to create new columns
## Copyright (C) Princeton Consultants, 2017-2018
### First import the pandas library and numpy so we can create random numbers

In [1]:
import pandas as pd
import numpy as np

### Create a DataFrame with some random data for hours and minutes

In [2]:
df = pd.DataFrame({'hours' : np.random.randint(1, 10, 8), 'minutes' : np.random.randint(0,59,8)})
df

Unnamed: 0,hours,minutes
0,9,16
1,1,28
2,6,38
3,6,41
4,2,57
5,7,10
6,7,43
7,1,5


### Show a basic computation to compute total minutes

In [3]:
df = df.assign(totmin = 60*df.hours + df.minutes)
df

Unnamed: 0,hours,minutes,totmin
0,9,16,556
1,1,28,88
2,6,38,398
3,6,41,401
4,2,57,177
5,7,10,430
6,7,43,463
7,1,5,65


### You can also just create the column directly

In [4]:
df['mintot'] = 60*df.hours + df.minutes
df

Unnamed: 0,hours,minutes,totmin,mintot
0,9,16,556,556
1,1,28,88,88
2,6,38,398,398
3,6,41,401,401
4,2,57,177,177
5,7,10,430,430
6,7,43,463,463
7,1,5,65,65


### Read in data containing dates as integers

In [5]:
didf = pd.read_csv("dates.csv")
didf.head()

Unnamed: 0,Begin,End
0,20150407,20150411
1,20150404,20150411
2,20150409,20150413
3,20150409,20150417
4,20150409,20150411


### Use math to compute the year, month, day, and differences.  Exercise care with dependencies, and reorder the columns

In [6]:
ddf=didf.assign(delta=didf.End-didf.Begin,
                BegYear = didf.Begin//10000,
                EndYear = didf.End//10000)
ddf=(ddf.assign(BegMonth=(ddf.Begin-ddf.BegYear*10000)//100,
                EndMonth=(ddf.End-ddf.EndYear*10000)//100,
                BegDay=ddf.Begin % 100,
                EndDay=ddf.End % 100)
      [['Begin','BegYear','BegMonth','BegDay','End','EndYear','EndMonth','EndDay','delta']]
      )
ddf.head()

Unnamed: 0,Begin,BegYear,BegMonth,BegDay,End,EndYear,EndMonth,EndDay,delta
0,20150407,2015,4,7,20150411,2015,4,11,4
1,20150404,2015,4,4,20150411,2015,4,11,7
2,20150409,2015,4,9,20150413,2015,4,13,4
3,20150409,2015,4,9,20150417,2015,4,17,8
4,20150409,2015,4,9,20150411,2015,4,11,2


### Use pandas Date/Time features to do the math. 

In [7]:
dtdf = didf.assign(BegTime=pd.to_datetime(didf.Begin, format='%Y%m%d'),
                   EndTime = pd.to_datetime(didf.End, format='%Y%m%d'))
dtdf = (dtdf.assign(BegYear = dtdf.BegTime.dt.year, 
                    BegMonth = dtdf.BegTime.dt.month,
                    BegDay = dtdf.BegTime.dt.day,
                    EndYear = dtdf.EndTime.dt.year,
                    EndMonth = dtdf.EndTime.dt.month,
                    EndDay = dtdf.EndTime.dt.day,
                    delta = dtdf.EndTime - dtdf.BegTime)
        [['Begin','BegTime','BegYear','BegMonth','BegDay',
          'End','EndTime','EndYear','EndMonth','EndDay','delta']]
        )
dtdf.head()

Unnamed: 0,Begin,BegTime,BegYear,BegMonth,BegDay,End,EndTime,EndYear,EndMonth,EndDay,delta
0,20150407,2015-04-07,2015,4,7,20150411,2015-04-11,2015,4,11,4 days
1,20150404,2015-04-04,2015,4,4,20150411,2015-04-11,2015,4,11,7 days
2,20150409,2015-04-09,2015,4,9,20150413,2015-04-13,2015,4,13,4 days
3,20150409,2015-04-09,2015,4,9,20150417,2015-04-17,2015,4,17,8 days
4,20150409,2015-04-09,2015,4,9,20150411,2015-04-11,2015,4,11,2 days


### Illustrate the special types used to manage date/time values and differences

In [8]:
pd.DataFrame(dtdf.dtypes)

Unnamed: 0,0
Begin,int64
BegTime,datetime64[ns]
BegYear,int64
BegMonth,int64
BegDay,int64
End,int64
EndTime,datetime64[ns]
EndYear,int64
EndMonth,int64
EndDay,int64
