In [1]:
import pandas as pd 
import numpy as np
import re

In [35]:
tree_df = pd.read_csv('/app/working/data/fire_tree_unique.csv')

In [36]:
tree_df

Unnamed: 0,Latitude,Longitude,Species,DBH,Any_Protected,tree_id,geometry,index_right,OBJECTID,YEAR_,...,AGENCY,UNIT_ID,FIRE_NAME,CAUSE,GIS_ACRES,DECADES,Shape__Area,Shape__Length,tree_appear_count,FIRE_SIZE_CATEGORY
0,34.154814,-118.589896,coast live oak (Quercus agrifolia),25.0,False,398,POINT (-118.58989569441763 34.15481399358518),18162,18164,1944.0,...,CCO,LAC,WOODLAND HILLS #47,14,4548.5230,1950-1959,2.694018e+07,35271.399924,1,Medium Fire
1,34.154815,-118.589929,coast live oak (Quercus agrifolia),13.0,False,399,POINT (-118.5899292220234 34.154815103381026),18162,18164,1944.0,...,CCO,LAC,WOODLAND HILLS #47,14,4548.5230,1950-1959,2.694018e+07,35271.399924,1,Medium Fire
2,34.275480,-118.541392,MyLA311 Added (MyLA311 Added),0.0,False,782,POINT (-118.54139204162942 34.27547960352691),15796,15798,1959.0,...,CCO,LAC,,14,624.3035,1950-1959,3.709861e+06,8002.545419,2,Medium Fire
3,34.145944,-118.505713,MyLA311 Added (MyLA311 Added),0.0,False,1620,POINT (-118.50571275517476 34.14594381558009),15672,15674,1960.0,...,CCO,LAC,,14,80.7574,1960-1969,4.783674e+05,4025.625330,1,Small Fire
4,34.089382,-118.452487,coast live oak (Quercus agrifolia),10.0,False,1826,POINT (-118.4524866762754 34.089381786916896),15424,15426,1961.0,...,CCO,LAC,BEL AIR,14,6151.9790,1960-1969,3.640036e+07,48599.914373,1,Large Fire
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55008,34.281985,-118.524873,swamp mallee (Eucalyptus spathulata),1.0,False,848183,POINT (-118.52487262848 34.281985493955),17825,17827,1947.0,...,CCO,LAC,BOBBY NO. 141,14,416.0731,1950-1959,2.472436e+06,7602.760800,1,Medium Fire
55009,34.281909,-118.524855,cork oak (Quercus suber),1.0,False,848184,POINT (-118.52485539633 34.281909200602),17825,17827,1947.0,...,CCO,LAC,BOBBY NO. 141,14,416.0731,1950-1959,2.472436e+06,7602.760800,1,Medium Fire
55010,34.281829,-118.524832,cork oak (Quercus suber),1.0,False,848185,POINT (-118.524831927001 34.281829415729),17825,17827,1947.0,...,CCO,LAC,BOBBY NO. 141,14,416.0731,1950-1959,2.472436e+06,7602.760800,1,Medium Fire
55011,34.281753,-118.524804,cork oak (Quercus suber),1.0,False,848186,POINT (-118.524804103646 34.281752631767),17825,17827,1947.0,...,CCO,LAC,BOBBY NO. 141,14,416.0731,1950-1959,2.472436e+06,7602.760800,1,Medium Fire


In [44]:
# Tree family rules (with Bignoniaceae included)
expanded_tree_families = {
    'Pine': ['Pinus'],
    'Oak': ['Quercus'],
    'Maple': ['Acer'],
    'Ash': ['Fraxinus'],
    'Fir': ['Abies'],
    'Cedar': ['Cedrus'],
    'Cypress': ['Cupressus', 'Hesperocyparis', 'Taxodium'],
    'Eucalyptus': ['Eucalyptus', 'Corymbia'],
    'Magnolia': ['Magnolia'],
    'Elm': ['Ulmus'],
    'Sycamore': ['Platanus'],
    'Willow': ['Salix'],
    'Juniper': ['Juniperus'],
    'Podocarpus': ['Podocarpus'],
    'Sweetgum': ['Liquidambar'],
    'Araucaria': ['Araucaria'],
    'Arbutus': ['Arbutus'],
    'Spruce': ['Picea'],
    'Acacia': ['Acacia'],
    'Desert Willow': ['Chilopsis'],
    'Birch': ['Betula'],
    'Douglas Fir': ['Pseudotsuga'],
    'She-oak': ['Allocasuarina'],
    'False Cypress': ['Chamaecyparis'],
    'Australian Willow': ['Geijera'],
    'Grevillea': ['Grevillea'],
    'Saltcedar': ['Tamarix'],
    'Fig': ['Ficus'],
    'Araucariaceae': ['Wollemia'],
    'Abutilon': ['Abutilon'],
    'Bignoniaceae': ['Chitalpa']  # Added Chitalpa here
}

# Rebuild genus-to-group mapping
genus_to_group = {
    genus.lower(): group
    for group, genera in expanded_tree_families.items()
    for genus in genera
}

# Function to extract genus (handles hybrid "×" or "x")
def extract_genus(species):
    match = re.search(r'\(([^)]+)\)', species)
    if match:
        parts = match.group(1).split()
    else:
        parts = species.split()
    for part in parts:
        if part.lower() not in ['×', 'x']:
            return part.strip().lower()
    return ''

# Function to assign group based on genus or keywords
def assign_tree_group(species):
    lname = species.lower()
    if 'palm' in lname:
        return 'Palm'
    elif any(kw in lname for kw in ['rose', 'shrub', 'bush', 'viburnum', 'camellia', 'cotoneaster', 'privet', 'gardenia', 'cassia']):
        return 'Shrub/Bush/Ornamental'
    elif any(kw in lname for kw in ['site', 'stump', 'default', 'property', 'unknown']):
        return 'Non-plant/Irrelevant'
    else:
        genus = extract_genus(species)
        return genus_to_group.get(genus, genus.capitalize())

# Apply the group assignment
tree_df['TreeGroup'] = tree_df['Species'].apply(assign_tree_group)

# Count group occurrences
group_counts = tree_df['TreeGroup'].value_counts()

valid_groups = group_counts[
    (group_counts > 10) &
    (~group_counts.index.isin(['Non-plant/Irrelevant', 'Shrub/Bush/Ornamental']))
].index

# Filter the DataFrame
tree_df = tree_df[tree_df['TreeGroup'].isin(valid_groups)].reset_index(drop=True)

In [45]:
counts = tree_df['TreeGroup'].value_counts()
counts

TreeGroup
Oak                12032
Eucalyptus          5889
Pine                5780
Sycamore            3241
Palm                2953
                   ...  
Carya                 14
Tristaniopsis         13
Pithecellobium        13
Cupressocyparis       12
Thuja                 11
Name: count, Length: 91, dtype: int64

In [46]:
tree_df['TreeGroup'].unique()

array(['Oak', 'Myla311', 'Jacaranda', 'Pine', 'Ceiba', 'Fig', 'Cypress',
       'Birch', 'Prunus', 'Eucalyptus', 'Eugenia', 'Callistemon',
       'Schinus', 'Lagerstroemia', 'Cupaniopsis', 'Palm', 'Pyrus',
       'Magnolia', 'Juniper', 'Juglans', 'Citrus', 'Handroanthus',
       'Bignoniaceae', 'Elm', 'Psidium', 'Olea', 'Lophostemon',
       'Eriobotrya', 'Persea', 'Yucca', 'Bauhinia', 'Ash', 'Cinnamomum',
       'Afrocarpus', 'Melaleuca', 'Ceratonia', 'Heteromeles', 'Ginkgo',
       'Pithecellobium', 'Morus', 'Sweetgum', 'Tipuana', 'Pistacia',
       'Celtis', 'Sambucus', 'Ailanthus', 'Acacia', 'Cedar',
       'Liriodendron', 'Agonis', 'Erythrina', 'Parkinsonia',
       'Brachychiton', 'Melia', 'Cupressocyparis', 'Koelreuteria',
       'Maple', 'Cercis', 'Sycamore', 'Searsia', 'Arbutus', 'Albizia',
       'Gleditsia', 'Australian Willow', 'Tristaniopsis', 'Leucaena',
       'Grevillea', 'Sequoia', 'Metrosideros', 'Robinia', 'Pittosporum',
       'Willow', 'Calocedrus', 'Araucaria', 'C