# Wrangling with datetime

In [None]:
import numpy as np
import pandas as pd


Pandas has datetime objects with many handy attributes

In [None]:
t = pd.Timestamp.now() 
print(t.year)
print(t.month) 
print(t.day)
print(t.hour)
print(t.minute)
print(t.second)

For demonstration purpose we load a file with datetime information

In [None]:
file = 'FSIS-Recall-Summary-2014'
df = pd.read_csv('data/{}.csv'.format(file), skiprows=1)
df.dropna()
df.head()

The `Recall Date` column looks like a Date Time format but it is not. We can simply transform it to a datetime format with `.to_datetime` method

In [None]:
print(df.dtypes)

In [None]:
df['Recall Date'] = pd.to_datetime(df['Recall Date'])

In [None]:
print(df.dtypes)

Since we would like to calculate with Pounds Recalled down the road we transfer this to a float

In [None]:
#clean Pounds Recalled
df['Pounds Recalled'] = df['Pounds Recalled'].astype(str).str.replace(r",", "", regex=True).astype(float)

In [None]:
df.tail()

In [None]:
df = df[:-1]

The date time object has several more features

In [None]:
df['day'] = df['Recall Date'].dt.weekday_name
df.head()

In [None]:
df['dayofyear'] = df['Recall Date'].dt.dayofyear
df.head()

If we change the index to the datetime column we can use the `index.month`, `index.day`, `index.year` attributes derived from the datetime object

In [None]:
#Set data as index
df = df.rename(columns = {'Recall Date': 'ddRecal'})
df.index = df['ddRecal']
df = df.drop(['ddRecal'], axis = 1)
df.head()

In [None]:
#select month 2
df_feb = df[(df.index.month == 2)] 
df_feb

In [None]:
#select first of all months
df_01 = df[(df.index.day == 1)] 
df_01

The month attribute is also handy when we want to find the month minimum value of a column. 

In [None]:
#create a new column with month minimum of Pounds Recalled
df["min.recall"] = df.groupby([df.index.month])["Pounds Recalled"].transform('min')
df.head(10)

## Create datetime date

In [None]:
data = pd.date_range('2/10/2015', periods = 29, freq ='d') 
data

In [None]:
df = pd.DataFrame({'Date': data})      

## Remove leap_year

In [None]:
def rempove_leapyear(df, index_c):
    df.index= df[index_c]
    df = df[~((df.index.month ==2) & (df.index.day ==29) )]
    return df

In [None]:
df = remove_leapyear(df, 'Date')