# Import Modules

## Standard Packages

In [1]:
import os
import sys
import os.path as path
import psutil
import glob
import random
import numpy as np
import pandas as pd
import xarray as xr
import pickle
from matplotlib import pyplot as plt
from mpl_toolkits import mplot3d
#plt.style.use('seaborn-white')
from datetime import date, datetime, timedelta, time
from timeit import default_timer as timer

## User-Defined Functions

In [2]:
from Extract_DFM_Reanalysis_Data_Helper import *
from Prepare_TrainTestFire_Data_Helper import *

# Global Start Time and Memory

In [3]:
global_start_time = timer()
process = psutil.Process(os.getpid())
global_initial_memory = process.memory_info().rss

# Variables to be Used for Preparing Train, Test, and Fire Data

## DataSet Defintion

In [4]:
# The current data set params
data_set_count = 1

## Define FM Threshold etc.

In [5]:
FM_binary_threshold = 0.03
FM_multiclass_levels = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

## Paths and File Names

#### Global

In [6]:
# WRF data set location and the extracted data set location
extracted_data_base_loc = '/p/lustre2/jha3/Wildfire/Wildfire_LDRD_SI/SJSU/01_WRF_Nelson_Data_Extracted'
prepared_data_base_loc = '/p/lustre2/jha3/Wildfire/Wildfire_LDRD_SI/SJSU/02_TrainTestFire_Data_Prepared'

#### DataSet Specific (Train, Test, Fire Data Extracted from WRF)

In [7]:
data_set_name = 'data_train_test_extracted_%02d'%(data_set_count)
extracted_data_loc = os.path.join(extracted_data_base_loc, data_set_name)
extracted_data_file_name = '{}_df.pkl'.format(data_set_name)

fire_data_set_name = 'data_fire_extracted_%02d'%(data_set_count)
fire_data_loc = os.path.join(extracted_data_base_loc, fire_data_set_name)
fire_data_file_name = '{}.pkl'.format(fire_data_set_name)

#### DataSet Specific (Train, Test, Fire Prepared)

In [8]:
prepared_data_set_name = 'data_prepared_%02d'%(data_set_count)

prepared_data_loc = os.path.join(prepared_data_base_loc, prepared_data_set_name)
os.system('mkdir -p %s'%prepared_data_loc)

prepared_data_file_name = '{}.pkl'.format(prepared_data_set_name)

# Generate seed for the random number generator

In [9]:
seed = generate_seed()
random_state = init_random_generator(seed)

# Load The Pickled Extracted Data (Train, Test, Fire) from WRF 

## Load The Train/Test Data Saved in Pickle File

In [10]:
df_tt_extracted = pd.read_pickle(os.path.join(extracted_data_loc, extracted_data_file_name))
#df_tt_extracted[998:1002]

## Load The Fire Data Saved in Pickle File

In [11]:
fire_data_file_handle = open(os.path.join(fire_data_loc, fire_data_file_name), 'rb')
fire_data_extracted = pickle.load(fire_data_file_handle)
fire_data_file_handle.close()
print('Read fire data from "{}" at "{}"'.format(fire_data_file_name, fire_data_loc))

Read fire data from "data_fire_extracted_01.pkl" at "/p/lustre2/jha3/Wildfire/Wildfire_LDRD_SI/SJSU/01_WRF_Nelson_Data_Extracted/data_fire_extracted_01"


In [12]:
#fire_data_extracted['Woosley'].head(5)

## Ensure The Train/test and Fire Data Have the Same Keys

In [13]:
#df_tt_extracted.keys() == fire_data_extracted['Woosley'].keys()

# Get Column Names in the Train, Test, and Fire Data

In [14]:
df_extracted = df_tt_extracted

In [15]:
keys_identity, keys_FM, \
keys_U10, keys_V10, U10Mag, \
keys_T2, keys_RH, keys_PREC, keys_SW, \
                            keys_HGT = get_keys_from_extracted_data (df_tt_extracted)



# Compute New Features

# Global End Time and Memory

In [16]:
global_final_memory = process.memory_info().rss
global_end_time = timer()
global_memory_consumed = global_final_memory - global_initial_memory
print('Total memory consumed: {:.3f} MB'.format(global_memory_consumed/(1024*1024)))
print('Total computing time: {:.3f} s'.format(global_end_time - global_start_time))
print('=========================================================================')
print("SUCCESS: Done Extraction of Data")

Total memory consumed: 312.180 MB
Total computing time: 0.899 s
SUCCESS: Done Extraction of Data
