In [1]:
import os
from glob import glob

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import earthpy as et

### Writing Pseudocode:
### Challenge 1: Import Monthly Fire Count Data

In the cell below complete the following task:

* Create a for loop that prints the file name  for each `.csv` file in the `earthpy-downloads\ca-fires-yearly\monthly-fire-count` directory
* In that same loop, extract the date for each year in each `.csv` file name and add it to a new list sorted in order from earliest to most recent  year using `list.append()`.

Your final list should look something like this (with more years however): 

`["1992", "1993", "1994"]`

At the end of the cell, call the list object of years so that it prints below 
the cell as standard output (**do not use the print statement**).

In [18]:
import re

data_path = os.path.join(et.io.HOME,
                      "earth-analytics",
                      "data",
                      "earthpy-downloads",
                      "ca-fires-yearly",
                      "monthly-fire-count")

# Print all dirs found by glob
data_dirs = os.path.join(data_path, "*")
all_files = glob(data_dirs)

year_list = []
for filename in all_files:
    year = re.search('([0-9]{4})\.csv', filename).group(1)
    list.append(year_list, year)
    print(filename)

year_list.sort()
year_list

/Users/judith/earth-analytics/data/earthpy-downloads/ca-fires-yearly/monthly-fire-count/monthly-fire-count-1999.csv
/Users/judith/earth-analytics/data/earthpy-downloads/ca-fires-yearly/monthly-fire-count/monthly-fire-count-1998.csv
/Users/judith/earth-analytics/data/earthpy-downloads/ca-fires-yearly/monthly-fire-count/monthly-fire-count-2010.csv
/Users/judith/earth-analytics/data/earthpy-downloads/ca-fires-yearly/monthly-fire-count/monthly-fire-count-2004.csv
/Users/judith/earth-analytics/data/earthpy-downloads/ca-fires-yearly/monthly-fire-count/monthly-fire-count-2005.csv
/Users/judith/earth-analytics/data/earthpy-downloads/ca-fires-yearly/monthly-fire-count/monthly-fire-count-2011.csv
/Users/judith/earth-analytics/data/earthpy-downloads/ca-fires-yearly/monthly-fire-count/monthly-fire-count-2007.csv
/Users/judith/earth-analytics/data/earthpy-downloads/ca-fires-yearly/monthly-fire-count/monthly-fire-count-2013.csv
/Users/judith/earth-analytics/data/earthpy-downloads/ca-fires-yearly/mon

['1992',
 '1993',
 '1994',
 '1995',
 '1996',
 '1997',
 '1998',
 '1999',
 '2000',
 '2001',
 '2002',
 '2003',
 '2004',
 '2005',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '2011',
 '2012',
 '2013',
 '2014',
 '2015']

### DRY Coding

1. Easier to follow and read
2. Easier to update and maintain

### Custom function:

In [19]:
def fahr_to_kelvin(fahr):
    """Convert temperature in Fahrenheit to kelvin.

    Parameters:
    -----------
    fahr: int or float
        The temperature in Fahrenheit.
    
    Returns:
    -----------
    kelvin : int or float
        The temperature in kelvin.
    """
    kelvin = ((fahr - 32) * (5 / 9)) + 273.15
    return kelvin

In [20]:
print(fahr_to_kelvin(0))
print(fahr_to_kelvin(75))

255.3722222222222
297.0388888888889


### Anatomy of a Function:

* def keyword
* function_name - concise but descriptive of what the function does!
* parameter(s): input required to perform the task
* return statement: output returned by the function

In [21]:
def function_name(parameter):
    print("some code here")   
    return output

### Doc String: 

multi-line documentation comment, that provides details about the function, including the specifics of the input parameters and the returns (e.g. type of objects, additional description) and any other important documentation about how to use the function.

You can learn more about docstrings in the PEP 257 guidelines focused on docstrings. This textbook uses the docstring standard that is outlined in the numpy documentation.

In [22]:
def function_name(data):
    """Docstrings should include a description of the function here 
    as well as identify the parameters (inputs) that the function 
    can take and the return (output) provided by the function,
    as shown below. 
    
    Parameters
    ----------
    input : type
        Description of input.
    
    Returns
    ------
    output : type
        Description of output.
    """
    print("some code here")
    
    return output

### function to convert mm to inches
* descriptive function name
* input parameter is original value in mm
* output value in inches
* add docstring

In [23]:
def mm_to_in(mm):
    """Convert input from millimeters to inches. 
    
    Parameters
    ----------
    mm : int or float
        Numeric value with units in millimeters.

    Returns
    ------
    inches : int or float
        Numeric value with units in inches.
    """
    inches = mm / 25.4    
    return inches

### Calling a custom function

In [24]:
# Average monthly precip (mm) in Jan for Boulder, CO
precip_jan_mm = 17.78

# Convert to inches
precip_jan_in = mm_to_in(mm = precip_jan_mm)

In [25]:
print(precip_jan_mm)
print(precip_jan_in)

17.78
0.7000000000000001


### Placeholder variables in functions
Placeholder variables hold a different value each time the function is called

** They don't exist outside of the function!

In [26]:
# Create new variable with converted values
precip_jan_in = mm_to_in(mm = precip_jan_mm) # mm is a placeholder variable for input of the function
# inches is placeholder for the output of the function

In [27]:
# Average monthly precip (mm) for Boulder, CO
avg_monthly_precip_mm = np.array([17.78, 19.05, 46.99, 74.422, 
                                  77.47, 51.308, 49.022, 41.148, 
                                  46.736, 33.274, 35.306, 21.336])

# Convert to inches
mm_to_in(mm = avg_monthly_precip_mm)

array([0.7 , 0.75, 1.85, 2.93, 3.05, 2.02, 1.93, 1.62, 1.84, 1.31, 1.39,
       0.84])

In [28]:
# Define a precip dataframe
precip_2002 = pd.DataFrame(columns=["month", "precip_mm"],
                           data=[
                                ["Jan", 27.178],  ["Feb", 11.176],
                                ["Mar", 38.100],  ["Apr", 5.080],
                                ["May", 81.280],  ["June", 29.972],
                                ["July", 2.286],  ["Aug", 36.576],
                                ["Sept", 38.608], ["Oct", 61.976],
                                ["Nov", 19.812],  ["Dec", 0.508]
                           ])

In [29]:
# Create new column with precip in inches
precip_2002["precip_in"] = mm_to_in(mm = precip_2002["precip_mm"])

precip_2002

Unnamed: 0,month,precip_mm,precip_in
0,Jan,27.178,1.07
1,Feb,11.176,0.44
2,Mar,38.1,1.5
3,Apr,5.08,0.2
4,May,81.28,3.2
5,June,29.972,1.18
6,July,2.286,0.09
7,Aug,36.576,1.44
8,Sept,38.608,1.52
9,Oct,61.976,2.44


### Calling Help on a Function

In [30]:
help(np.mean)

Help on function mean in module numpy:

mean(a, axis=None, dtype=None, out=None, keepdims=<no value>, *, where=<no value>)
    Compute the arithmetic mean along the specified axis.
    
    Returns the average of the array elements.  The average is taken over
    the flattened array by default, otherwise over the specified axis.
    `float64` intermediate and return values are used for integer inputs.
    
    Parameters
    ----------
    a : array_like
        Array containing numbers whose mean is desired. If `a` is not an
        array, a conversion is attempted.
    axis : None or int or tuple of ints, optional
        Axis or axes along which the means are computed. The default is to
        compute the mean of the flattened array.
    
        .. versionadded:: 1.7.0
    
        If this is a tuple of ints, a mean is performed over multiple axes,
        instead of a single axis or all the axes as before.
    dtype : data-type, optional
        Type to use in computing the mean.

In [31]:
help(mm_to_in)

Help on function mm_to_in in module __main__:

mm_to_in(mm)
    Convert input from millimeters to inches. 
    
    Parameters
    ----------
    mm : int or float
        Numeric value with units in millimeters.
    
    Returns
    ------
    inches : int or float
        Numeric value with units in inches.



### Combining function calls
The output of mm_to_in -> np.mean

In [32]:
avg_monthly_precip_in = np.mean(mm_to_in(mm = avg_monthly_precip_mm))
avg_monthly_precip_in

1.6858333333333333

### Multiple parameters

In [33]:
def multiply_values(x,y):
    """Calculate product of two inputs. 
    
    Parameters
    ----------
    x : int or float
    y : int or float

    Returns
    ------
    z : int or float
    """
    z = x * y
    return z

In [34]:
# Call function with numeric values
multiply_values(x = 0.7, y = 25.4)

17.779999999999998

### Multiple parameters - combine functions

In [35]:
def mean_mm_to_in_arr(arr_mm, axis_value):
    """Calculate mean values of input array along a specified
    axis and convert values from millimeters to inches.
    
    Parameters
    ----------
    arr_mm : numpy array
        Numeric values in millimeters.
    axis_value : int
        0 to calculate mean for each column.
        1 to calculate mean for each row.

    Returns
    ------
    mean_arr_in : numpy array
        Mean values of input array in inches.
    """    
    mean_arr_mm = np.mean(arr_mm, axis = axis_value)
    mean_arr_in = mean_arr_mm / 25.4 
        
    return mean_arr_in

In [36]:
# 2d array of average monthly precip (mm) for 2002 and 2013 in Boulder, CO
precip_2002_2013_mm = np.array([[27.178, 11.176, 38.1, 5.08, 81.28, 29.972, 
                                 2.286, 36.576, 38.608, 61.976, 19.812, 0.508],
                                [6.858, 28.702, 43.688, 105.156, 67.564, 15.494,  
                                 26.162, 35.56 , 461.264, 56.896, 7.366, 12.7]
                               ])

In [37]:
# Calculate monthly mean (inches) for precip_2002_2013
monthly_mean_in = mean_mm_to_in_arr(arr_mm = precip_2002_2013_mm, 
                                    axis_value = 0)

monthly_mean_in

array([0.67 , 0.785, 1.61 , 2.17 , 2.93 , 0.895, 0.56 , 1.42 , 9.84 ,
       2.34 , 0.535, 0.26 ])

In [38]:
# Calculate monthly mean (inches) for precip_2002_2013
monthly_mean_in = mean_mm_to_in_arr(arr_mm = precip_2002_2013_mm, 
                                    axis_value = 1)

monthly_mean_in

array([1.15666667, 2.84583333])

### Define optional input parameter for a function

In [39]:
def mean_mm_to_in_arr(arr_mm, axis_value=None):
    """Calculate mean values of input array and convert values 
    from millimeters to inches. If an axis is specified,
    the mean will be calculated along that axis. 

    
    Parameters
    ----------
    arr_mm : numpy array
        Numeric values in millimeters.
    axis_value : int (optional)
        0 to calculate mean for each column.
        1 to calculate mean for each row.

    Returns
    ------
    mean_arr_in : numpy array
        Mean values of input array in inches.
    """   
    if axis_value is None:
        mean_arr_mm = np.mean(arr_mm)        
    else:
        mean_arr_mm = np.mean(arr_mm, axis = axis_value)
    
    mean_arr_in = mean_arr_mm / 25.4 
        
    return mean_arr_in

### File download function

In [40]:
def download_import_df(file_url, path):   
    """Download file from specified URL and import file
    into a pandas dataframe from a specified path. 
    
    Working directory is set to earth-analytics directory 
    under home, which is automatically created by the
    download. 

    
    Parameters
    ----------
    file_url : str
        URL to CSV file (http or https).
    path : str
        Path to CSV file using relative path
        to earth-analytics directory under home.        

    Returns
    ------
    df : pandas dataframe
        Dataframe imported from downloaded CSV file.
    """ 
    
    et.data.get_data(url=file_url)      
    os.chdir(os.path.join(et.io.HOME, "earth-analytics"))    
    df = pd.read_csv(path)
    
    return df

### Concatenating dataframes

In [41]:
data_dir = os.path.join(et.io.HOME,'earth-analytics','data')
file_path = os.path.join(data_dir,"earthpy-downloads","avg-monthly-temp-fahr","san-diego","*")
file_list = glob(file_path)

sd_list = []
file_list.sort()
for f in file_list:
    file_name = os.path.basename(f)
    df = pd.read_csv(f)
    print(df.shape)
    print("year: ",file_name[10:14])
    sd_list.append(df)
    
sd_data = pd.concat(sd_list)
sd_data

(1, 13)
year:  1999
(1, 13)
year:  2000
(1, 13)
year:  2001
(1, 13)
year:  2002
(1, 13)
year:  2003


Unnamed: 0,Year,January,February,March,April,May,June,July,August,September,October,November,December
0,1999,65.2,64.9,63.6,64.6,64.4,66.9,72.7,72.7,71.4,77.7,67.4,67.6
0,2000,65.8,65.3,63.7,67.7,69.1,72.4,73.5,76.3,75.5,69.5,66.0,66.9
0,2001,62.2,61.7,63.7,63.6,67.5,72.0,73.0,73.5,73.2,71.1,66.9,63.3
0,2002,63.0,66.2,63.6,63.7,66.1,68.7,71.8,73.4,75.5,68.4,71.4,63.7
0,2003,71.4,64.6,66.5,65.2,66.9,67.4,74.3,77.5,74.0,73.1,67.4,64.6


### Plotting using groupby and nested loop

In [42]:
df = pd.read_csv('~/earth-analytics/data/demo_data.csv')
plot_grp_one = df.groupby(['year'])


fig, ax = plt.subplots()
for year, fire_list in plot_grp_one:
    for cause, fire_size in zip(fire_list.cause.values, fire_list.fire_size.values):
        ax.scatter(year,
                   fire_size,
                   label=cause)
ax.set(title="Large Fire Cause Analysis 1995-2000",
            xlabel="Month Number",
            ylabel="Fire Size (acres)")

handles, labels = ax.get_legend_handles_labels()
unique = [(h, l) for i, (h, l) in enumerate(zip(handles, labels)) if l not in labels[:i]]
ax.legend(*zip(*unique))

plt.show()

FileNotFoundError: [Errno 2] No such file or directory: '/Users/judith/earth-analytics/data/demo_data.csv'