<a href="https://colab.research.google.com/github/HugoStigletz/Data-Analytics-Portfolio/blob/main/6_6_2_Datetime_Mapping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 6.6.2 Activity: Python Packages and Libraries
You are an environmental scientist tasked by the governor of California to show maps of California and to show the extreme fire season during the summer. Make a copy of this notebook and work through it in your own Drive.

### Step 1: Make sure you have `CA_wildfires.csv` downloaded and upload here.

In [None]:
from google.colab import files
CA_wildfires = files.upload()

### Step 2: Import packages and libraries and load data into dataframe.
There is no "date" column in a format that is easy to read. However, we do have the fire year, and the discover day of the year as `DISCOVERY_DOY`

In [1]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import pandas as pd
import folium


fires = pd.read_csv('/content/6.6.2 & 6.6.3 CA_wildfires.csv')
fires.head()

Unnamed: 0,SOURCE_REPORTING_UNIT_NAME,FIRE_YEAR,FIRE_SIZE,LONGITUDE,LATITUDE,DISCOVERY_DATE,DISCOVERY_DOY,DISCOVERY_TIME,STAT_CAUSE_CODE,STAT_CAUSE_DESCR,CONT_DATE,CONT_DOY,CONT_TIME,OWNER_DESCR,STATE,COUNTY
0,Eldorado National Forest,2004,0.1,-119.913333,38.559167,2453184.5,180,1600.0,1,Lightning,2453189.5,185.0,1400.0,USFS,CA,3.0
1,Eldorado National Forest,2004,0.1,-119.933056,38.559167,2453184.5,180,1600.0,1,Lightning,2453189.5,185.0,1200.0,USFS,CA,3.0
2,Eldorado National Forest,2004,16823.0,-120.211667,38.523333,2453284.5,280,1415.0,2,Equipment Use,2453299.5,295.0,1000.0,USFS,CA,5.0
3,Eldorado National Forest,2004,7700.0,-120.26,38.78,2453291.5,287,1618.0,2,Equipment Use,2453295.5,291.0,1800.0,USFS,CA,17.0
4,Humboldt-Toiyabe National Forest,2005,0.1,-119.840556,38.700278,2453496.5,126,1145.0,5,Debris Burning,2453499.5,129.0,1330.0,STATE OR PRIVATE,CA,3.0


In [None]:
fires.shape, fires.columns

((5774, 16),
 Index(['SOURCE_REPORTING_UNIT_NAME', 'FIRE_YEAR', 'FIRE_SIZE', 'LONGITUDE',
        'LATITUDE', 'DISCOVERY_DATE', 'DISCOVERY_DOY', 'DISCOVERY_TIME',
        'STAT_CAUSE_CODE', 'STAT_CAUSE_DESCR', 'CONT_DATE', 'CONT_DOY',
        'CONT_TIME', 'OWNER_DESCR', 'STATE', 'COUNTY'],
       dtype='object'))

#### Step 3: Use `pd.to_datetime`
The data set provides the fire year along with the day of the year the fire was discovered. One way to get the column in the right format is by using some handy arithmetic. 

**Hint:** Python can take a date written in the form YYYYDOY and turn it into a datetime. So, for example, if it was January 15, 2015, it could turn 2015015 into the correct datetime. 

In [None]:
#First, save the column as a series of the dataframe in the right YYYYDOY format.
year_doy = ...

In [None]:
#First 
year_doy = fires['FIRE_YEAR'] * 1000 + fires['DISCOVERY_DOY']

In [None]:
## 2004 "yr" + 180 days of the year | how many days of the year so far
year_doy

0       2004180
1       2004180
2       2004280
3       2004287
4       2005126
         ...   
5769    2010206
5770    2014157
5771    2014184
5772    2015210
5773    2015213
Length: 5774, dtype: int64

#### Now use datetime to add a 'Date' column. 

For reference, here is the [format documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).

In [None]:
#key
fires['Date'] = pd.to_datetime(year_doy, format='%Y%j')

In [None]:
fires['Date']

0      2004-06-28
1      2004-06-28
2      2004-10-06
3      2004-10-13
4      2005-05-06
          ...    
5769   2010-07-25
5770   2014-06-06
5771   2014-07-03
5772   2015-07-29
5773   2015-08-01
Name: Date, Length: 5774, dtype: datetime64[ns]

### Step 4: Now that we have a date column, we can visualize the California fires on different dates. 

In [None]:
#Filter rows/columns for the map. Start by selecting the year, fire size, and long/lat.
fires_map = fires[['FIRE_YEAR', 'FIRE_SIZE', 'LONGITUDE', 'LATITUDE', 'COUNTY', 'STATE', 'Date']]

In [None]:
#Let's just look at the 2015 fires that were larger than 3000 acres.
fires_map2 = fires_map[(fires_map['Date'].dt.year == 2015) & (fires_map['FIRE_SIZE'] > 3000) & (fires_map['STATE'] == 'CA')]

In [None]:
fires_map.head()

Unnamed: 0,FIRE_YEAR,FIRE_SIZE,LONGITUDE,LATITUDE,COUNTY,STATE,Date
0,2004,0.1,-119.913333,38.559167,3.0,CA,2004-06-28
1,2004,0.1,-119.933056,38.559167,3.0,CA,2004-06-28
2,2004,16823.0,-120.211667,38.523333,5.0,CA,2004-10-06
3,2004,7700.0,-120.26,38.78,17.0,CA,2004-10-13
4,2005,0.1,-119.840556,38.700278,3.0,CA,2005-05-06


In [None]:
fires_map2.head()

Unnamed: 0,FIRE_YEAR,FIRE_SIZE,LONGITUDE,LATITUDE,COUNTY,STATE,Date
5360,2015,4812.0,-121.391667,41.398333,49,CA,2015-07-30
5369,2015,12166.0,-123.775833,41.555833,15,CA,2015-08-02
5370,2015,6797.8,-123.453333,40.956944,23,CA,2015-07-30
5380,2015,6960.0,-118.535556,36.235278,107,CA,2015-07-19
5448,2015,3676.0,-119.159167,37.878056,51,CA,2015-08-14


In [None]:
#Create an empty map
m = folium.Map(location=[37.76, -122.45], tiles = 'Stamen Terrain', zoom_start=6)

#Add the bubbles
for i in range(0,len(fires_map2)):
   folium.Circle(
      location=[fires_map2.iloc[i]['LATITUDE'], fires_map2.iloc[i]['LONGITUDE']],
      radius=float(fires_map2.iloc[i]['FIRE_SIZE']),
      color='crimson',
      fill=True,
      fill_color='crimson'
   ).add_to(m)

m

### Step 5: Replace the `...` with the year you want to explore. 


In [None]:
#Replace ellipsis
fires_map3 = fires_map[(fires_map['Date'].dt.year == 2000) & (fires_map['FIRE_SIZE'] > 3000)]

#Create an empty map
n = folium.Map(location=[37.76, -122.45], tiles = 'Stamen Terrain', zoom_start=6)

#Add the bubbles
for i in range(0,len(fires_map3)):
    folium.Circle(
      location=[fires_map3.iloc[i]['LATITUDE'], fires_map3.iloc[i]['LONGITUDE']],
      radius=float(fires_map3.iloc[i]['FIRE_SIZE']),
      color='crimson',
      fill=True,
      fill_color='crimson'
   ).add_to(n)
n

### Conclusions
What do you notice about the locations of the large fires? 

**Double-click and replace this text to answer.**

In [None]:
fires_map.dtypes

FIRE_YEAR             int64
FIRE_SIZE           float64
LONGITUDE           float64
LATITUDE            float64
COUNTY               object
STATE                object
Date         datetime64[ns]
dtype: object

In [5]:
from datetime import datetime, timedelta, date


In [None]:
# df['dates'] = pd.to_datetime(df['dates'])

In [2]:
from datetime import datetime
strs = '21 September,2021'
datetime.strptime(strs, '%d %B,%Y')


datetime.datetime(2021, 9, 21, 0, 0)

In [4]:
strs = "21 September, 2021"                       ## string
dtime = datetime.strptime(strs, "%d %B, %Y")     ## convert string to date
strdate = datetime.isoformat(dtime)              ## convert to ISO format

strdate


'2021-09-21T00:00:00'

In [6]:
strs = "21 September, 2021"                       ## string
dtime = datetime.strptime(strs, "%d %B, %Y") ## convert string to date

ddate = dtime.date()               ## extract the date portion
date.isoformat(ddate)              ## convert to ISO format


'2021-09-21'

In [10]:
pd.to_datetime('7/12/2021', format='%m/%d/%Y')

Timestamp('2021-07-12 00:00:00')

In [17]:
s1 = '2022-05-15'


dtime1 = datetime.strptime(s1, '%Y-%m-%d')
S1 = datetime.isoformat(dtime1)


prior_date = dtime1 - timedelta(days = 20) ## 20 days prior to dtime1 (2022-05-15)
datetime.isoformat(prior_date)


'2022-04-25T00:00:00'

In [19]:
pd.to_datetime('12/7/21 21:00', format='%d/%m/%y %H:%M') - pd.to_timedelta(2, unit='D')

Timestamp('2021-07-10 21:00:00')

In [20]:
pd.to_datetime('12/7/21 21:00', format='%d/%m/%y %H:%M') + pd.to_timedelta(10, unit='W')

Timestamp('2021-09-20 21:00:00')

In [21]:
datetime.now() - timedelta(days = 7)


datetime.datetime(2022, 7, 27, 2, 6, 4, 989274)

In [22]:
datetime.now()

datetime.datetime(2022, 8, 3, 2, 6, 44, 48624)

In [23]:
date.isoformat(date.today())

'2022-08-03'