Import needed libaries

In [1]:
from dotenv import dotenv_values
import sqlalchemy
import pandas as pd
import sql_functions as sf
import matplotlib.pyplot as plt

Define schema & engine to call df from Database

In [2]:
schema = 'capstone_wildfire'
engine = sf.get_engine()

Define the table & query 

In [3]:
sql_query = f'select * from {schema}.fires_data_v1_rdy_to_clean_rows;'

Get datatable as dataframe

In [4]:
wild_fire_df = sf.get_dataframe(sql_query)

In [None]:
def create_trend_df(wildfire_df, input_year):

  # Convert date column to datetime 
  wildfire_df['combined_discovery_date'] = pd.to_datetime(wildfire_df['combined_discovery_date'])

  # Get total fires per state per year
  yearly_counts = wildfire_df.groupby(['state_name', wildfire_df['combined_discovery_date'].dt.year])['unique_id'].count().reset_index(name='unique_id')

  # Get top 10 states by average fires per year
  state_avg = yearly_counts.groupby('state_name')['unique_id'].mean().sort_values(ascending=False)
  
  # Get top state names
  top_states = state_avg.index

  # Create output DataFrame
  output = pd.DataFrame({
    'category': ['20 Year AVG']*len(top_states),
    'state': top_states,
    'total_avg': state_avg.values
  })

  return output

In [None]:
create_trend_df(wild_fire_df,2015)

In [None]:
def create_trend_df_avgburn(wildfire_df, input_year):

  # Calculate 20 year averages 
  yearly_data = wildfire_df.groupby(['state_name', wildfire_df['combined_discovery_date'].dt.year]).agg({'burning_time':'sum', 'unique_id':'count'}).reset_index()

  yearly_data['avg_burning_time'] = yearly_data['burning_time'] / yearly_data['unique_id']

  state_avg = yearly_data.groupby('state_name')['avg_burning_time'].mean().sort_values(ascending=False)

  # Output as numbers not lists
  output = pd.DataFrame({
    'category': ['20 Year AVG']*len(state_avg),
    'state': state_avg.index,
    'total_avg': state_avg.values
  })

  return output

In [None]:
create_trend_df_avgburn(wild_fire_df,2015)

In [None]:
import pandas as pd

def create_trend_df_avgsize(wildfire_df):

  # Calculate averages only for fire_size
  yearly_data = wildfire_df.groupby(['state_name', wildfire_df['combined_discovery_date'].dt.year]).agg({'fire_size':'sum', 'unique_id':'count'}).reset_index()

  yearly_data['avg_fire_size'] = yearly_data['fire_size'] / yearly_data['unique_id']

  state_avg_size = yearly_data.groupby('state_name')['avg_fire_size'].mean().sort_values(ascending=False)

  # Construct output DataFrame
  output = pd.DataFrame({
    'category': ['20 Year AVG Size']*len(state_avg_size), 
    'state': state_avg_size.index,
    'total_avg': state_avg_size.values
  })

  return output

In [None]:
create_trend_df_avgsize(wild_fire_df)

In [None]:
import pandas as pd

df_size = create_trend_df_avgsize(wild_fire_df)

df_avg = create_trend_df_avgburn(wild_fire_df, 2015)

df_burn = create_trend_df_avgburn(wild_fire_df, 2015)

# Rest of logic with df_size, df_avg, df_burn

points_size = {state:index for index, state in enumerate(df_size['state'])}
points_avg = {state:index for index, state in enumerate(df_avg['state'])}
points_burn = {state:index for index, state in enumerate(df_burn['state'])}

state_points = {}
for state in df_size['state']:
  if state not in state_points:
     state_points[state] = 0
  state_points[state] += points_size[state]
  state_points[state] += points_avg[state]
  state_points[state] += points_burn[state]

top_5 = sorted(state_points, key=state_points.get)[:5]  
print(top_5)

In [None]:
import pandas as pd

df_size = create_trend_df_avgsize(wild_fire_df)
df_avg = create_trend_df_avgburn(wild_fire_df, 2015) 
df_burn = create_trend_df_avgburn(wild_fire_df, 2015)

# Get total points per state
state_points = {}
for _, row in df_size.iterrows():
  state = row['state']
  if state not in state_points:
     state_points[state] = 0
  state_points[state] += 1

for _, row in df_avg.iterrows():
  state = row['state']
  if state not in state_points:
     state_points[state] = 0
  state_points[state] += 1
  
for _, row in df_burn.iterrows():
  state = row['state']
  if state not in state_points:
     state_points[state] = 0
  state_points[state] += 1
  
# Create dataframe  
df = pd.DataFrame.from_dict(state_points, orient='index', columns=['points']) 

# Get bottom 5 states by points
bottom_5 = df.nsmallest(5, 'points')
print(bottom_5)