In [1]:
# Import the required packages
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import matplotlib.dates as mdates
import seaborn as sns
import plotly as py
import plotly.graph_objs as go

py.tools.set_credentials_file(username="Skipper9000", api_key="amFXGNa5upB0UZfrmv1m")
py.offline.init_notebook_mode(connected=True)

***
## Finding and Evaluating Historical Sugarcane Production Data
The United States Department of Agriculture's National Agricultural Statistics Service has a good website where I aquired this data using their online searchable database:

https://quickstats.nass.usda.gov/

I selected the categorical data parameters in the following order to obtain the relavant sugarcane data:
    1. Sector:           CROPS
    2. Group:            FIELD CROPS
    3. Commodity:        SUGARCANE
    4. Category:         [PRICE RECIEVED, PRODUCTION]
    5. Data Item:        [SUGARCANE, SUGAR - PRICE RECEIVED, MEASURED IN $/TON,
                      SUGARCANE, SUGAR - PRODUCTION, MEASURED IN $,
                          SUGARCANE, SUGAR - PRODCUTION, MEASURED IN TONS]
    6. Domain:           TOTAL
    7. Geographic Level: STATE
    8. State:            [FLORIDA, HAWAII, LOUISIANA, TEXAS]
    9. Year:             [1909 to 2017]

I received a .csv file containing 701 rows (including a header). I then performed the following data integrity checks using pandas DataFrames as well as a varity of different plotly interactive chart types.

In [2]:
# Import the historical sugarcane data
df_nassUSDA = pd.read_csv("Datasets/NASS_USDA/FFFD6900-27F3-3E24-A627-FC347BA73EAE.csv", header=0)
df_nassUSDA.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 701 entries, 0 to 700
Data columns (total 21 columns):
Program             701 non-null object
Year                701 non-null int64
Period              701 non-null object
Week Ending         0 non-null float64
Geo Level           701 non-null object
State               701 non-null object
State ANSI          701 non-null int64
Ag District         0 non-null float64
Ag District Code    0 non-null float64
County              0 non-null float64
County ANSI         0 non-null float64
Zip Code            0 non-null float64
Region              0 non-null float64
watershed_code      701 non-null int64
Watershed           0 non-null float64
Commodity           701 non-null object
Data Item           701 non-null object
Domain              701 non-null object
Domain Category     701 non-null object
Value               701 non-null object
CV (%)              4 non-null object
dtypes: float64(8), int64(3), object(10)
memory usage: 115.1+ KB


In [3]:
# Reshape the historical sugarcane data into an Annual Production Weight (US ton) by State 
# DateFrame for further analysis
df_tons = df_nassUSDA[["Program", "Year", "Period", "State", "Data Item", "Value"]] \
    [(df_nassUSDA["Data Item"] == "SUGARCANE, SUGAR - PRODUCTION, MEASURED IN TONS") \
     & (df_nassUSDA["Program"] == "SURVEY") \
     & (df_nassUSDA["Period"] == "YEAR")] \
    .sort_values(["Year", "State"])
df_tons.Value = df_tons.Value.str.replace(',', '').astype("int64")
df_tons_pivot = df_tons.pivot_table(index="Year", columns="State", values="Value")

In [4]:
# Plot the historical sugarcane data production by weight Dataframe for some quick 
# Exploratory Data Analysis (EDA)
# Generate a static plot using Seaborn and matplotlib
# sns.set_style("darkgrid")
# plt.plot(df_tons_pivot.index, df_tons_pivot[["FLORIDA", "HAWAII", "LOUISIANA", "TEXAS"]])
# plt.title("Historical U.S. Sugarcane Production by Weight", fontsize=24, fontweight="bold", \
#           family="serif")
# plt.xlabel("Year", fontsize=14, family="serif")
# plt.ylabel("Production, US tons", fontsize=14, family="serif")
# plt.gca().yaxis.set_major_formatter(mtick.StrMethodFormatter("{x:,.0f}"))
# xrange = [x for x in range(1900,2030,10)]
# plt.gca().xaxis.set_major_locator(plt.FixedLocator(xrange))
# plt.xticks(fontsize=12, family="serif")
# plt.yticks(fontsize=12, family="serif")
# plt.gcf().set_size_inches(14,6)
# plt.legend(["Florida","Hawaii","Louisiana","Texas"], loc="best", fontsize=14)
# plt.show()

# Generate an interactive plot using the plotly package
layout = go.Layout(
    plot_bgcolor="rgb(247,247,247)",
    legend=dict(font=dict(family="serif", size=12)),
    title="<b>Historical U.S. Sugarcane Production by Weight</b>",
    titlefont=dict(family="serif", size=24),
    yaxis=dict(title="<b>Production, US tons</b>", titlefont=dict(family="serif", size=14), 
               tickfont=dict(family="serif", size=14)),
    xaxis=dict(title="<b>Year</b>", titlefont=dict(family="serif", size=14), 
               tickfont=dict(family="serif", size=14))
)

trace1 = go.Scatter(
    x=df_tons_pivot.index,
    y=df_tons_pivot["FLORIDA"],
    mode='lines+markers',
    name="Florida"
)

trace2 = go.Scatter(
    x=df_tons_pivot.index,
    y=df_tons_pivot["HAWAII"],
    mode='lines+markers',
    name="Hawaii"
)

trace3 = go.Scatter(
    x=df_tons_pivot.index,
    y=df_tons_pivot["LOUISIANA"],
    mode='lines+markers',
    name="Louisiana"
)

trace4 = go.Scatter(
    x=df_tons_pivot.index,
    y=df_tons_pivot["TEXAS"],
    mode='lines+markers',
    name="Texas"
)

fig = go.Figure(data=[trace1, trace2, trace3, trace4], layout=layout)
py.offline.iplot(fig)

## Historical Sugarcane Production by Weight Data Evaluation Notes
Looking at the above plotly interactive scatter plot, I notice a few things:
    1. Texas has a large gap spanning from 1923 to 1973 in its production by weight data
    2. Hawaii has a downward production trend starting 1968 that may not be attributed to weather conditions alone
    3. Lousiana has the longest span of continuous production by weight data ranging from 1909 to 2017
    4. Florida has a continuous production by weight data span ranging from 1928 to 2017
    5. Texas' production by weight has held relatively constant from 1973 to 2017
    6. Florida's and Louisiana's peaks and dips relatively track together from 1996 to 2017
***

In [5]:
# Reshape the historical sugarcane data into an Annual Production Value (US Dollar) by 
# State DateFrame for further analysis
df_dollars = df_nassUSDA[["Program", "Year", "Period", "State", "Data Item", "Value"]] \
    [(df_nassUSDA["Data Item"] == "SUGARCANE, SUGAR - PRODUCTION, MEASURED IN $") \
     & (df_nassUSDA["Program"] == "SURVEY") \
     & (df_nassUSDA["Period"] == "YEAR")] \
    .sort_values(["Year", "State"])
df_dollars.Value = df_dollars.Value.str.replace(',', '').astype("int64")
df_dollars_pivot = df_dollars.pivot_table(index="Year", columns="State", values="Value")

In [6]:
# Plot the historical sugarcane data production by value Dataframe for some quick Exploratory 
# Data Analysis (EDA)
# Generate a static plot using Seaborn and matplotlib
# sns.set_style("darkgrid")
# plt.plot(df_dollars_pivot.index, df_dollars_pivot[["FLORIDA", "HAWAII", "LOUISIANA", "TEXAS"]])
# plt.title("Historical U.S. Sugarcane Production by Value", fontsize=24, fontweight="bold", \
#           family="serif")
# plt.xlabel("Year", fontsize=14, family="serif")
# plt.ylabel("Production, US $", fontsize=14, family="serif")
# plt.gca().yaxis.set_major_formatter(mtick.StrMethodFormatter("{x:,.0f}"))
# xrange = [x for x in range(1900,2030,10)]
# plt.gca().xaxis.set_major_locator(plt.FixedLocator(xrange))
# plt.xticks(fontsize=12, family="serif")
# plt.yticks(fontsize=12, family="serif")
# plt.gcf().set_size_inches(14,6)
# plt.legend(["Florida","Hawaii","Louisiana","Texas"], loc="best", fontsize=14)
# plt.show()

# Generate an interactive plot using the plotly package
layout = go.Layout(
    plot_bgcolor="rgb(247,247,247)",
    legend=dict(font=dict(family="serif", size=12)),
    title="<b>Historical U.S. Sugarcane Production by Value</b>",
    titlefont=dict(family="serif", size=24),
    yaxis=dict(title="<b>Production, US $</b>", titlefont=dict(family="serif", size=14), 
               tickfont=dict(family="serif", size=14)),
    xaxis=dict(title="<b>Year</b>", titlefont=dict(family="serif", size=14), 
               tickfont=dict(family="serif", size=14))
)

trace1 = go.Scatter(
    x=df_dollars_pivot.index,
    y=df_dollars_pivot["FLORIDA"],
    mode='lines+markers',
    name="Florida"
)

trace2 = go.Scatter(
    x=df_dollars_pivot.index,
    y=df_dollars_pivot["HAWAII"],
    mode='lines+markers',
    name="Hawaii"
)

trace3 = go.Scatter(
    x=df_dollars_pivot.index,
    y=df_dollars_pivot["LOUISIANA"],
    mode='lines+markers',
    name="Louisiana"
)

trace4 = go.Scatter(
    x=df_dollars_pivot.index,
    y=df_dollars_pivot["TEXAS"],
    mode='lines+markers',
    name="Texas"
)

fig = go.Figure(data=[trace1, trace2, trace3, trace4], layout=layout)
py.offline.iplot(fig)

## Historical Sugarcane Production by Value Data Evaluation Notes
Looking at the above plotly interactive scatter plot, I notice a few things:
    1. The time span (1978 to 2016) does not match the production by weight data's time span (1909 to 2017)
    2. Again, Hawaii has a downward production trend that may not be attributed to weather conditions alone
    3. Again, Texas' sugarcane production has held relatively constant
***

In [7]:
# Reshape the historical sugarcane data into an Annual Price Received Index ($/ton) by 
# State DateFrame for further analysis
df_dpt = df_nassUSDA[["Program", "Year", "Period", "State", "Data Item", "Value"]] \
    [(df_nassUSDA["Data Item"] == "SUGARCANE, SUGAR - PRICE RECEIVED, MEASURED IN $ / TON") \
     & (df_nassUSDA["Program"] == "SURVEY") \
     & (df_nassUSDA["Period"] == "MARKETING YEAR")] \
    .sort_values(["Year", "State"])
df_dpt.Value = df_dpt.Value.str.replace(',', '').astype("float64")
df_dpt_pivot = df_dpt.pivot_table(index="Year", columns="State", values="Value")

In [8]:
# Plot the historical sugarcane data price received index Dataframe for some quick Exploratory 
# Data Analysis (EDA)
# Generate a static plot using Seaborn and matplotlib
# sns.set_style("darkgrid")
# plt.plot(df_dpt_pivot.index, df_dpt_pivot[["FLORIDA", "HAWAII", "LOUISIANA", "TEXAS"]])
# plt.title("Historical U.S. Sugarcane Price Received Index", fontsize=24, fontweight="bold", \
#           family="serif")
# plt.xlabel("Year", fontsize=14, family="serif")
# plt.ylabel("Price Received Index, $/ton", fontsize=14, family="serif")
# plt.gca().yaxis.set_major_formatter(mtick.StrMethodFormatter("{x:,.0f}"))
# xrange = [x for x in range(1900,2030,10)]
# plt.gca().xaxis.set_major_locator(plt.FixedLocator(xrange))
# plt.xticks(fontsize=12, family="serif")
# plt.yticks(fontsize=12, family="serif")
# plt.gcf().set_size_inches(14,6)
# plt.legend(["Florida","Hawaii","Louisiana","Texas"], loc="best", fontsize=14)
# plt.show()

# Generate an interactive plot using the plotly package
layout = go.Layout(
    plot_bgcolor="rgb(247,247,247)",
    legend=dict(font=dict(family="serif", size=12)),
    title="<b>Historical U.S. Sugarcane Price Received Index</b>",
    titlefont=dict(family="serif", size=24),
    yaxis=dict(title="<b>Price Received Index, $/ton</b>", titlefont=dict(family="serif", size=14), 
               tickfont=dict(family="serif", size=14)),
    xaxis=dict(title="<b>Year</b>", titlefont=dict(family="serif", size=14), 
               tickfont=dict(family="serif", size=14))
)

trace1 = go.Scatter(
    x=df_dpt_pivot.index,
    y=df_dpt_pivot["FLORIDA"],
    mode='lines+markers',
    name="Florida"
)

trace2 = go.Scatter(
    x=df_dpt_pivot.index,
    y=df_dpt_pivot["HAWAII"],
    mode='lines+markers',
    name="Hawaii"
)

trace3 = go.Scatter(
    x=df_dpt_pivot.index,
    y=df_dpt_pivot["LOUISIANA"],
    mode='lines+markers',
    name="Louisiana"
)

trace4 = go.Scatter(
    x=df_dpt_pivot.index,
    y=df_dpt_pivot["TEXAS"],
    mode='lines+markers',
    name="Texas"
)

fig = go.Figure(data=[trace1, trace2, trace3, trace4], layout=layout)
py.offline.iplot(fig)

## Historical Sugarcane Price Received Index Data Evaluation Notes
Looking at the above plotly interactive scatter plot, I notice a few things:
    1. The time span (1978 to 2016) does not match the production by weight data's time span (1909 to 2017)
    2. These price received indexes start to relatively spearate and become volatile in 2009
***

In [9]:
#  Import the historical local weather data for Moore Haven, FL
df_ncdcNOAA_fl = pd.read_csv("Datasets/NCDC_NOAA/Florida/Moore Haven/1337798.csv", header=0)
df_ncdcNOAA_fl.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35598 entries, 0 to 35597
Data columns (total 11 columns):
STATION      35598 non-null object
NAME         35598 non-null object
LATITUDE     35598 non-null float64
LONGITUDE    35598 non-null float64
ELEVATION    35598 non-null float64
DATE         35598 non-null object
PRCP         35494 non-null float64
SNOW         22321 non-null float64
TMAX         35415 non-null float64
TMIN         35349 non-null float64
WDMV         24645 non-null float64
dtypes: float64(8), object(3)
memory usage: 3.0+ MB


In [70]:
# Reshape the historical weather data for Moore Haven, FL to extract only the relavent data
# for future analysis
df_fl_weather = df_ncdcNOAA_fl[["DATE", "PRCP", "SNOW", "TMAX", "TMIN", "WDMV"]] \
                .sort_values("DATE")
df_fl_weather.DATE = pd.to_datetime(df_fl_weather.DATE)
df_fl_weather = df_fl_weather.set_index("DATE")
df_fl_weather["Tavg"] = (df_fl_weather.TMAX + df_fl_weather.TMIN) / 2
df_fl_weather["Year"] = df_fl_weather.index.year
df_fl_weather["Month"] = df_fl_weather.index.month
df_fl_weather["YYYY-MM"] = df_fl_weather.index.strftime("%Y-%m")

# Data reshaped/resampled to be indexed by YYYY-MM with the appropriate aggregation method
df_fl_tmax = df_fl_weather[["YYYY-MM","Year","Month","TMAX"]].dropna()
df_fl_lines_tmax = df_fl_tmax.pivot_table(index="YYYY-MM", values="TMAX", aggfunc="mean").round(2)
df_fl_tmin = df_fl_weather[["YYYY-MM","Year","Month","TMIN"]].dropna()
df_fl_lines_tmin = df_fl_tmin.pivot_table(index="YYYY-MM", values="TMIN", aggfunc="mean").round(2)
df_fl_prcp = df_fl_weather[["YYYY-MM","Year","Month","PRCP"]].dropna()
df_fl_lines_prcp = df_fl_prcp.pivot_table(index="YYYY-MM", values="PRCP", aggfunc="sum")
df_fl_snow = df_fl_weather[["YYYY-MM","Year","Month","SNOW"]].dropna()
df_fl_lines_snow = df_fl_snow.pivot_table(index="YYYY-MM", values="SNOW", aggfunc="sum")
df_fl_wdmv = df_fl_weather[["YYYY-MM","Year","Month","WDMV"]].dropna()
df_fl_lines_wdmv = df_fl_wdmv.pivot_table(index="YYYY-MM", values="WDMV", aggfunc="sum")
df_fl_lines = df_fl_lines_tmax.join([df_fl_lines_tmin, df_fl_lines_prcp, df_fl_lines_snow, df_fl_lines_wdmv])

# Data reshaped to be indexed by Year and split into Monthly columns with the appropriate aggregation method
df_fl_tmax = df_fl_tmax.pivot_table(index="Year", columns="Month", values="TMAX", aggfunc="mean").round(2)
df_fl_tmin = df_fl_tmin.pivot_table(index="Year", columns="Month", values="TMIN", aggfunc="mean").round(2)
df_fl_prcp = df_fl_prcp.pivot_table(index="Year", columns="Month", values="PRCP", aggfunc="sum")
df_fl_snow = df_fl_snow.pivot_table(index="Year", columns="Month", values="SNOW", aggfunc="sum")
df_fl_wdmv = df_fl_wdmv.pivot_table(index="Year", columns="Month", values="WDMV", aggfunc="sum")

In [71]:
# Plot the historical weather Dataframe for Moore Haven, FL for some quick Exploratory
# Data Analysis (EDA)
# Generate an interactive plot using the plotly package
layout = go.Layout(
    plot_bgcolor="rgb(247,247,247)",
    legend=dict(font=dict(family="serif", size=12)),
    title="<b>Historical Temperature Data for Moore Haven, FL</b>",
    titlefont=dict(family="serif", size=24),
    yaxis=dict(title="<b>Temperature, {0}F</b>".format(u'\xb0'), 
               titlefont=dict(family="serif", size=14),
               tickfont=dict(family="serif", size=14)),
    xaxis=dict(title="<b>Year</b>", titlefont=dict(family="serif", size=14), 
               tickfont=dict(family="serif", size=14))
)

trace1 = go.Scatter(
    x=df_fl_lines.index,
    y=df_fl_lines.TMAX,
    mode="lines",
    name="tmax"
)

trace2 = go.Scatter(
    x=df_fl_lines.index,
    y=df_fl_lines.TMIN,
    mode="lines",
    name="tmin"
)

fig = go.Figure(data=[trace1, trace2], layout=layout)
py.offline.iplot(fig)

In [72]:
# Plot the historical weather Dataframe for Moore Haven, FL for some quick Exploratory
# Data Analysis (EDA)
# Generate an interactive plot using the plotly package
layout = go.Layout(
    plot_bgcolor="rgb(247,247,247)",
    legend=dict(font=dict(family="serif", size=12)),
    title="<b>Historical Maximum Temperature Data for Moore Haven, FL</b>",
    titlefont=dict(family="serif", size=24),
    yaxis=dict(title="<b>Temperature, {0}F</b>".format(u'\xb0'), 
               titlefont=dict(family="serif", size=14),
               tickfont=dict(family="serif", size=14)),
    xaxis=dict(title="<b>Month , 1927-2017</b>", titlefont=dict(family="serif", size=14), 
               tickfont=dict(family="serif", size=14))
)

trace1 = go.Box(
    y=df_fl_tmax[1],
    boxmean="sd",
    name="Jan"
)

trace2 = go.Box(
    y=df_fl_tmax[2],
    boxmean="sd",
    name="Feb"
)

trace3 = go.Box(
    y=df_fl_tmax[3],
    boxmean="sd",
    name="Mar"
)

trace4 = go.Box(
    y=df_fl_tmax[4],
    boxmean="sd",
    name="Apr"
)

trace5 = go.Box(
    y=df_fl_tmax[5],
    boxmean="sd",
    name="May"
)

trace6 = go.Box(
    y=df_fl_tmax[6],
    boxmean="sd",
    name="Jun"
)

trace7 = go.Box(
    y=df_fl_tmax[7],
    boxmean="sd",
    name="Jul"
)

trace8 = go.Box(
    y=df_fl_tmax[8],
    boxmean="sd",
    name="Aug"
)

trace9 = go.Box(
    y=df_fl_tmax[9],
    boxmean="sd",
    name="Sep"
)

trace10 = go.Box(
    y=df_fl_tmax[10],
    boxmean="sd",
    name="Oct"
)

trace11 = go.Box(
    y=df_fl_tmax[11],
    boxmean="sd",
    name="Nov"
)

trace12 = go.Box(
    y=df_fl_tmax[12],
    boxmean="sd",
    name="Dec"
)

fig = go.Figure(data=[trace1, trace2, trace3, trace4, trace5, trace6, \
                      trace7, trace8, trace9, trace10, trace11, trace12], layout=layout)
py.offline.iplot(fig)

In [73]:
# Plot the historical weather Dataframe for Moore Haven, FL for some quick Exploratory
# Data Analysis (EDA)
# Generate an interactive plot using the plotly package
layout = go.Layout(
    plot_bgcolor="rgb(247,247,247)",
    legend=dict(font=dict(family="serif", size=12)),
    title="<b>Historical Minimum Temperature Data for Moore Haven, FL</b>",
    titlefont=dict(family="serif", size=24),
    yaxis=dict(title="<b>Temperature, {0}F</b>".format(u'\xb0'), 
               titlefont=dict(family="serif", size=14),
               tickfont=dict(family="serif", size=14)),
    xaxis=dict(title="<b>Month , 1927-2017</b>", titlefont=dict(family="serif", size=14), 
               tickfont=dict(family="serif", size=14))
)

trace1 = go.Box(
    y=df_fl_tmin[1],
    boxmean="sd",
    name="Jan"
)

trace2 = go.Box(
    y=df_fl_tmin[2],
    boxmean="sd",
    name="Feb"
)

trace3 = go.Box(
    y=df_fl_tmin[3],
    boxmean="sd",
    name="Mar"
)

trace4 = go.Box(
    y=df_fl_tmin[4],
    boxmean="sd",
    name="Apr"
)

trace5 = go.Box(
    y=df_fl_tmin[5],
    boxmean="sd",
    name="May"
)

trace6 = go.Box(
    y=df_fl_tmin[6],
    boxmean="sd",
    name="Jun"
)

trace7 = go.Box(
    y=df_fl_tmin[7],
    boxmean="sd",
    name="Jul"
)

trace8 = go.Box(
    y=df_fl_tmin[8],
    boxmean="sd",
    name="Aug"
)

trace9 = go.Box(
    y=df_fl_tmin[9],
    boxmean="sd",
    name="Sep"
)

trace10 = go.Box(
    y=df_fl_tmin[10],
    boxmean="sd",
    name="Oct"
)

trace11 = go.Box(
    y=df_fl_tmin[11],
    boxmean="sd",
    name="Nov"
)

trace12 = go.Box(
    y=df_fl_tmin[12],
    boxmean="sd",
    name="Dec"
)

fig = go.Figure(data=[trace1, trace2, trace3, trace4, trace5, trace6, \
                      trace7, trace8, trace9, trace10, trace11, trace12], layout=layout)
py.offline.iplot(fig)

In [74]:
# Plot the historical weather Dataframe for Moore Haven, FL for some quick Exploratory
# Data Analysis (EDA)
# Generate an interactive plot using the plotly package
layout = go.Layout(
    plot_bgcolor="rgb(247,247,247)",
    legend=dict(font=dict(family="serif", size=12)),
    title="<b>Historical Precipitation Data for Moore Haven, FL</b>",
    titlefont=dict(family="serif", size=24),
    yaxis=dict(title="<b>Precipitation, inches</b>", 
               titlefont=dict(family="serif", size=14),
               tickfont=dict(family="serif", size=14)),
    xaxis=dict(title="<b>Year</b>", titlefont=dict(family="serif", size=14), 
               tickfont=dict(family="serif", size=14))
)

trace1 = go.Scatter(
    x=df_fl_lines.index,
    y=df_fl_lines.PRCP,
    mode="lines",
    name="prcp"
)

fig = go.Figure(data=[trace1], layout=layout)
py.offline.iplot(fig)

In [75]:
# Plot the historical weather Dataframe for Moore Haven, FL for some quick Exploratory
# Data Analysis (EDA)
# Generate an interactive plot using the plotly package
layout = go.Layout(
    plot_bgcolor="rgb(247,247,247)",
    legend=dict(font=dict(family="serif", size=12)),
    title="<b>Historical Precipitation Data for Moore Haven, FL</b>",
    titlefont=dict(family="serif", size=24),
    yaxis=dict(title="<b>Precipitation, inches</b>", 
               titlefont=dict(family="serif", size=14),
               tickfont=dict(family="serif", size=14)),
    xaxis=dict(title="<b>Month , 1927-2017</b>", titlefont=dict(family="serif", size=14), 
               tickfont=dict(family="serif", size=14))
)

trace1 = go.Box(
    y=df_fl_prcp[1],
    boxmean="sd",
    name="Jan"
)

trace2 = go.Box(
    y=df_fl_prcp[2],
    boxmean="sd",
    name="Feb"
)

trace3 = go.Box(
    y=df_fl_prcp[3],
    boxmean="sd",
    name="Mar"
)

trace4 = go.Box(
    y=df_fl_prcp[4],
    boxmean="sd",
    name="Apr"
)

trace5 = go.Box(
    y=df_fl_prcp[5],
    boxmean="sd",
    name="May"
)

trace6 = go.Box(
    y=df_fl_prcp[6],
    boxmean="sd",
    name="Jun"
)

trace7 = go.Box(
    y=df_fl_prcp[7],
    boxmean="sd",
    name="Jul"
)

trace8 = go.Box(
    y=df_fl_prcp[8],
    boxmean="sd",
    name="Aug"
)

trace9 = go.Box(
    y=df_fl_prcp[9],
    boxmean="sd",
    name="Sep"
)

trace10 = go.Box(
    y=df_fl_prcp[10],
    boxmean="sd",
    name="Oct"
)

trace11 = go.Box(
    y=df_fl_prcp[11],
    boxmean="sd",
    name="Nov"
)

trace12 = go.Box(
    y=df_fl_prcp[12],
    boxmean="sd",
    name="Dec"
)

fig = go.Figure(data=[trace1, trace2, trace3, trace4, trace5, trace6, \
                      trace7, trace8, trace9, trace10, trace11, trace12], layout=layout)
py.offline.iplot(fig)

In [76]:
# Plot the historical weather Dataframe for Moore Haven, FL for some quick Exploratory
# Data Analysis (EDA)
# Generate an interactive plot using the plotly package
layout = go.Layout(
    plot_bgcolor="rgb(247,247,247)",
    legend=dict(font=dict(family="serif", size=12)),
    title="<b>Historical Snow Data for Moore Haven, FL</b>",
    titlefont=dict(family="serif", size=24),
    yaxis=dict(title="<b>Snow, inches</b>", 
               titlefont=dict(family="serif", size=14),
               tickfont=dict(family="serif", size=14)),
    xaxis=dict(title="<b>Year</b>", titlefont=dict(family="serif", size=14), 
               tickfont=dict(family="serif", size=14))
)

trace1 = go.Scatter(
    x=df_fl_lines.index,
    y=df_fl_lines.SNOW,
    mode="lines",
    name="snow"
)

fig = go.Figure(data=[trace1], layout=layout)
py.offline.iplot(fig)

In [77]:
# Plot the historical weather Dataframe for Moore Haven, FL for some quick Exploratory
# Data Analysis (EDA)
# Generate an interactive plot using the plotly package
layout = go.Layout(
    plot_bgcolor="rgb(247,247,247)",
    legend=dict(font=dict(family="serif", size=12)),
    title="<b>Historical Wind Data for Moore Haven, FL</b>",
    titlefont=dict(family="serif", size=24),
    yaxis=dict(title="<b>Wind Movement, miles</b>", 
               titlefont=dict(family="serif", size=14),
               tickfont=dict(family="serif", size=14)),
    xaxis=dict(title="<b>Year</b>", titlefont=dict(family="serif", size=14), 
               tickfont=dict(family="serif", size=14))
)

trace1 = go.Scatter(
    x=df_fl_lines.index,
    y=df_fl_lines.WDMV,
    mode="lines",
    name="wdmv"
)

fig = go.Figure(data=[trace1], layout=layout)
py.offline.iplot(fig)

In [78]:
# Plot the historical weather Dataframe for Moore Haven, FL for some quick Exploratory
# Data Analysis (EDA)
# Generate an interactive plot using the plotly package
layout = go.Layout(
    plot_bgcolor="rgb(247,247,247)",
    legend=dict(font=dict(family="serif", size=12)),
    title="<b>Historical Wind Data for Moore Haven, FL</b>",
    titlefont=dict(family="serif", size=24),
    yaxis=dict(title="<b>Wind Movement, miles</b>", 
               titlefont=dict(family="serif", size=14),
               tickfont=dict(family="serif", size=14)),
    xaxis=dict(title="<b>Month , 1927-2017</b>", titlefont=dict(family="serif", size=14), 
               tickfont=dict(family="serif", size=14))
)

trace1 = go.Box(
    y=df_fl_wdmv[1],
    boxmean="sd",
    name="Jan"
)

trace2 = go.Box(
    y=df_fl_wdmv[2],
    boxmean="sd",
    name="Feb"
)

trace3 = go.Box(
    y=df_fl_wdmv[3],
    boxmean="sd",
    name="Mar"
)

trace4 = go.Box(
    y=df_fl_wdmv[4],
    boxmean="sd",
    name="Apr"
)

trace5 = go.Box(
    y=df_fl_wdmv[5],
    boxmean="sd",
    name="May"
)

trace6 = go.Box(
    y=df_fl_wdmv[6],
    boxmean="sd",
    name="Jun"
)

trace7 = go.Box(
    y=df_fl_wdmv[7],
    boxmean="sd",
    name="Jul"
)

trace8 = go.Box(
    y=df_fl_wdmv[8],
    boxmean="sd",
    name="Aug"
)

trace9 = go.Box(
    y=df_fl_wdmv[9],
    boxmean="sd",
    name="Sep"
)

trace10 = go.Box(
    y=df_fl_wdmv[10],
    boxmean="sd",
    name="Oct"
)

trace11 = go.Box(
    y=df_fl_wdmv[11],
    boxmean="sd",
    name="Nov"
)

trace12 = go.Box(
    y=df_fl_wdmv[12],
    boxmean="sd",
    name="Dec"
)

fig = go.Figure(data=[trace1, trace2, trace3, trace4, trace5, trace6, \
                      trace7, trace8, trace9, trace10, trace11, trace12], layout=layout)
py.offline.iplot(fig)

In [88]:
#  Import the historical local weather data for Baton Rouge, LA
df_ncdcNOAA_la = pd.read_csv("Datasets/NCDC_NOAA/Louisiana/Baton Rouge/LSU/1338856.csv", header=0)
print(df_ncdcNOAA_la.info())
print()
print("Overall dates ranging from " + str(df_ncdcNOAA_la["DATE"].min()) + " to " + str(df_ncdcNOAA_la["DATE"].max()))
print()
df_la_wind = df_ncdcNOAA_la[["DATE", "WDMV"]].dropna()
print("Wind dates ranging from " + str(df_la_wind["DATE"].min()) + " to " + str(df_la_wind["DATE"].max()))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19872 entries, 0 to 19871
Data columns (total 13 columns):
STATION      19872 non-null object
NAME         19872 non-null object
LATITUDE     19872 non-null float64
LONGITUDE    19872 non-null float64
ELEVATION    19872 non-null float64
DATE         19872 non-null object
DAWM         102 non-null float64
MDWM         102 non-null float64
PRCP         19815 non-null float64
SNOW         17243 non-null float64
TMAX         19808 non-null float64
TMIN         19809 non-null float64
WDMV         15597 non-null float64
dtypes: float64(10), object(3)
memory usage: 2.0+ MB
None

Overall dates ranging from 1963-01-01 to 2017-12-31

Wind dates ranging from 1963-01-01 to 2008-12-31
