# Sandbox notebook for data and results exploration

In [20]:
from sqlalchemy import create_engine
from pathlib import Path
import pandas as pd

## 1. Cleaned data preview

In [21]:
db_path: Path = Path("../data/disaster/disaster_response.db")

In [22]:
engine = create_engine(f"sqlite:///{db_path}")
df = pd.read_sql("disaster_messages", engine)

In [23]:
X = df["message"]
X.head()

0    Weather update - a cold front from Cuba that c...
1              Is the Hurricane over or is it not over
2                      Looking for someone but no name
3    UN reports Leogane 80-90 destroyed. Only Hospi...
4    says: west side of Haiti, rest of the country ...
Name: message, dtype: object

In [24]:
category_columns = [
    col for col in df.columns if sorted(df[col].dropna().unique()) == [0, 1]
]
y = df[category_columns]
y.head()

Unnamed: 0,related,request,offer,aid_related,medical_help,medical_products,search_and_rescue,security,military,water,...,aid_centers,other_infrastructure,weather_related,floods,storm,fire,earthquake,cold,other_weather,direct_report
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,1,0,0,0,0,0,0,...,0,0,1,0,1,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,1,0,1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## 2. Model test metrics

In [25]:
metrics_df = pd.read_csv(
    "../data/models/performance_metrics.csv", index_col=0, header=[0, 1]
)
metrics_df

Unnamed: 0_level_0,balanced_accuracy,balanced_accuracy,f1_score,f1_score,precision,precision,recall,recall
Unnamed: 0_level_1,test,train,test,train,test,train,test,train
aid_centers,0.50422,0.984651,0.017857,0.84453,0.034483,0.745763,0.012048,0.973451
aid_related,0.676707,0.930411,0.621964,0.911321,0.610312,0.839965,0.63407,0.995926
buildings,0.529181,0.986689,0.108949,0.910822,0.25,0.848739,0.069652,0.982703
clothing,0.577912,0.994254,0.243243,0.961345,0.529412,0.934641,0.157895,0.989619
cold,0.499846,0.961646,0.009524,0.832524,0.019231,0.753846,0.006329,0.929539
death,0.560783,0.97897,0.189922,0.864198,0.316129,0.77853,0.135734,0.971049
direct_report,0.624196,0.955145,0.395083,0.899973,0.576923,0.855911,0.300401,0.948819
earthquake,0.796996,0.98218,0.677519,0.923202,0.756055,0.873203,0.613764,0.979275
electricity,0.50728,0.980437,0.030769,0.872216,0.0625,0.794872,0.020408,0.966234
fire,0.495295,0.980468,0.0,0.705009,0.0,0.553936,0.0,0.969388
