In [2]:
import numpy as np
import pandas as pd

%config IPCompleter.greedy = True

# Suppress scientific notation
np.set_printoptions(suppress=True)

# IMPORTS

In [56]:
ufo = pd.read_csv('http://bit.ly/uforeports')

In [57]:
ufo.head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00


### CHANGE DTYPE OF DATES TO DATETIME

In [58]:
ufo['Time'] = pd.to_datetime(ufo['Time'])

In [59]:
# day of month
print("Date in month: ", ufo.Time.dt.day.tolist()[:3])

print("Hour of day: ", ufo.Time.dt.hour.tolist()[:3])

# week number
print("Week number: ", ufo.Time.dt.weekofyear.tolist()[:3])

# day of year
print("Day number of year: ", ufo.Time.dt.dayofyear.tolist()[:3])

# day of week
print("Day of week: ", ufo.Time.dt.weekday_name.tolist()[:3])

Date in month:  [1, 30, 15]
Hour of day:  [22, 20, 14]
Week number:  [22, 27, 7]
Day number of year:  [152, 181, 46]
Day of week:  ['Sunday', 'Monday', 'Sunday']


### CONVERT DATETIME TO DATE

In [60]:
ufo['Time'] = ufo['Time'].dt.date

### FILTER DATAFRAME BASED ON TIMESTAMP

In [61]:
ts = pd.to_datetime('1/1/1999')

ufo.loc[ufo['Time'] >=ts].head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
12832,Loma Rica,,LIGHT,CA,1999-01-01
12833,Bauxite,,,AR,1999-01-01
12834,Florence,,CYLINDER,SC,1999-01-01
12835,Lake Henshaw,,CIGAR,CA,1999-01-01
12836,Wilmington Island,,LIGHT,GA,1999-01-01


### LATEST UFO SIGHTING

In [62]:
ufo.loc[ufo['Time'] == ufo['Time'].max()].head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
18228,Kingsville,,LIGHT,TX,2000-12-31
18229,Chicago,,DISK,IL,2000-12-31
18230,Pismo Beach,,OVAL,CA,2000-12-31
18231,Pismo Beach,,OVAL,CA,2000-12-31
18232,Lodi,,,WI,2000-12-31


### EARLIEST UFO SIGHNTING

In [63]:
ufo.loc[ufo['Time'] == ufo['Time'].min()].head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,1930-06-01


### TIME BETWEEN FIRST AND LAST UFO SIGHTING

In [64]:
last = ufo['Time'].max()
first = ufo['Time'].min()

print('Timedelta: ', (last - first))
print('Days: ', (last - first).days)
print('Years: ', round(((last - first).days/365), ndigits=1))

Timedelta:  25781 days, 0:00:00
Days:  25781
Years:  70.6


### ADD 5 DAYS TO EVERY DATE IN SERIES

In [73]:
dates = pd.to_datetime(pd.Series(['02-02-2018', '02-05-2018', '02-10-2018']))
dates

0   2018-02-02
1   2018-02-05
2   2018-02-10
dtype: datetime64[ns]

In [72]:
dates + pd.DateOffset(5)

0   2018-02-07
1   2018-02-10
2   2018-02-15
dtype: datetime64[ns]

### 21. How to convert a series of date-strings to a timeseries?

In [40]:
# Input
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05', '2015-06-06T12:20'])

# Solution 1
from dateutil.parser import parse
ser.map(lambda x: parse(x))

# Solution 2
pd.to_datetime(ser)

0   2010-01-01 00:00:00
1   2011-02-02 00:00:00
2   2012-03-03 00:00:00
3   2013-04-04 00:00:00
4   2014-05-05 00:00:00
5   2015-06-06 12:20:00
dtype: datetime64[ns]

### 22. How to get the day of month, week number, day of year and day of week from a series of date strings?
Get the day of month, week number, day of year and day of week from ser

In [41]:
# Input
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05', '2015-06-06T12:20'])

# Solution
from dateutil.parser import parse
ser_ts = ser.map(lambda x: parse(x))

# day of month
print("Date: ", ser_ts.dt.day.tolist())

# week number
print("Week number: ", ser_ts.dt.weekofyear.tolist())

# day of year
print("Day number of year: ", ser_ts.dt.dayofyear.tolist())

# day of week
print("Day of week: ", ser_ts.dt.weekday_name.tolist())

Date:  [1, 2, 3, 4, 5, 6]
Week number:  [53, 5, 9, 14, 19, 23]
Day number of year:  [1, 33, 63, 94, 125, 157]
Day of week:  ['Friday', 'Wednesday', 'Saturday', 'Thursday', 'Monday', 'Saturday']
