In [1]:
# System
import os
from IPython.display import clear_output

# Data manipulation
import pandas as pd
import numpy as np

# Data viz
import seaborn as sns
import matplotlib.pyplot as plt
try:
    import sweetviz as sv
except:
    !pip install sweetviz
    clear_output()
    import sweetviz as sv

# Preferences
pd.options.display.max_columns = None

In [2]:
# Read in the nest data
raw_data = pd.read_csv("../Data/02_intermediate/concat_data_loc_year.csv")
# all_data = raw_data.copy().drop(['Unnamed: 0'], axis=1)
nest_data = raw_data.copy()

# Read in the weather data
weather_data = pd.read_csv("../Data/02_intermediate/FL_WX_Edit.csv")
wx_df = weather_data.copy()

# Merge them together
all_data = pd.merge(
    left=nest_data, 
    right=wx_df, 
    left_on=['Station', 'Year'], 
    right_on=['STATION', 'DATE'], 
    how='left'
)
all_data = all_data.drop(['DATE', 'STATION'], axis=1)

In [3]:
# Bring in population data
excel_file = '../Data/02_intermediate/FLcopops_Edit_no_Census.xlsx'
all_sheets = pd.read_excel(excel_file, sheet_name=None)
combined_df = pd.concat(all_sheets.values(), ignore_index=True)
combined_df = combined_df.drop(['PopulationC'], axis=1)
combined_df.columns = [
    'County', 
    'Population', 
    'Unincorporated', 
    'Prop_Unincorporated', 
    'Incorporated', 
    'Prop_Incorporated', 
    'Year'
]

# Merge back in with all_data
all_data = pd.merge(
    left=all_data, 
    right=combined_df, 
    left_on=['County', 'Year'], 
    right_on=['County', 'Year'], 
    how='left'
)

In [4]:
# Clean up the status col
all_data['Status'] = all_data['Status'].astype(str)

status = []
for i in range(len(all_data)):
    val = all_data['Status'][i]
    if val!="nan":
        status.append(val)
    elif all_data['Successful'][i]==1:
        status.append("Successful")
    elif all_data['Perished'][i]==1:
        status.append("Failed")
    elif all_data['Active'][i]==1:
        status.append("Inactive")
    else:
        status.append("Unknown")

all_data['Status'] = status

In [5]:
# CLean up the substrate col
sub_dict = {
    'tree':'Tree', 
    'artificial structure':'Artificial Structure',  
    'Artificial':'Artificial Structure', 
    'ArtStruct':'Artificial Structure',
    'Art Struct':'Artificial Structure', 
    'Dead tree':'Dead Tree',  
    'Artificial_Structure':'Artificial Structure',
    'artificial_Structure':'Artificial Structure', 
    'Artificial_structure':'Artificial Structure', 
    ' Tree':'Tree',
}
all_data['Substrate'] = all_data['Substrate'].replace(sub_dict)

In [10]:
# Use sweet_viz for a quick EDA
report = sv.analyze(all_data)
report.show_html()

                                             |      | [  0%]   00:00 -> (? left)

Report SWEETVIZ_REPORT.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.


In [None]:
# Drop cols due to 