In [1]:
# Standard library imports
from io import StringIO
import importlib
import math
import os
import pdb
import re
import regex
import sys 

# Third party imports
import dateparser
import reverse_geocoder as rg
import numpy as np
import pandas as pd

# Local application imports
import mender_tools as mt

# Reading the csv file into a dataframe

In [13]:
df = pd.read_csv('blazes/fires.csv')

# Set options to display all rows and columns in Jupyter notebook

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Checking out the dataframe

In [14]:
df.shape

(109, 10)

In [None]:
df.dtypes

In [None]:
df.head()

# Renaming columns names

In [None]:
df.columns

In [15]:
df.columns = ['Cause', 'Coordinates', 'Date', 'Fuels Involved', 'Incident', 'Incident Type', 'Location', 'Perimeter Contained (%)', 'Personnel Involved', 'Fire Size (Acres)']

# Removing rows with all values missing and striping whitespaces

In [16]:
# Checking total number of rows with all cells empty
df.isnull().all(axis=1).sum()


9

In [17]:
# Drop all the rows with all cells empty
df.dropna(how='all', inplace=True)

In [18]:
# Locate rows of duplicate data
dups = df.duplicated()
print(dups.any())

False


In [19]:
# Reseting the index of the dataframe
df.reset_index(drop = True, inplace = True)

In [20]:
# Remove trailing and leading whitespaces
df.loc[:,:] = df.applymap(lambda x: x.strip() if type(x)==str else x)

# Making a backup

In [21]:
''' In order to make a copy of the dataframe we need two different instances of the dataframe, if we don't do that any changes
made to any of the variables that reference to dataframe will modify the other one. In that case we use the method copy().'''
wildfire_df = df.copy()

# Arrange Numeric Columns

## Preparing column 'Fire Size (Acres)'

In [None]:
# Copy values of acres in column 'Personnel Involved' that pertain to column 'Fire Size (Acres)'
wildfire_df = mt.emend_values(df, wildfire_df, 'Personnel Involved', 'Fire Size (Acres)', r'.*\s*Acres')

In [None]:
# Remove the string 'Acres' that comes along with the digits
wildfire_df = mt.strip_symbol(wildfire_df, 'Fire Size (Acres)', 'Acres')

In [None]:
# Convert the string digits to integer values
wildfire_df = mt.convert_to_int(wildfire_df, 'Fire Size (Acres)')

In [None]:
 # Check for value types 
wildfire_df['Fire Size (Acres)'].apply(type)

## Preparing column 'Perimeter Contained (%)'

In [None]:
# Get values in column 'Fire Size (Acres)' that belong to column 'Perimeter Contained (%)'
wildfire_df = mt.emend_values(df, wildfire_df, 'Fire Size (Acres)', 'Perimeter Contained (%)', r'^\d*[.]{0,1}\d*\s*%$')

In [None]:
# Get rid of '%' symbol
wildfire_df = mt.strip_symbol(wildfire_df, 'Perimeter Contained (%)', '%')

In [None]:
# Convert float values to integer
wildfire_df = mt.convert_to_int(wildfire_df, 'Perimeter Contained (%)')

In [None]:
# Total of value types
wildfire_df['Perimeter Contained (%)'].apply(type).value_counts()

In [None]:
# Convert string values to NaN
wildfire_df = mt.str_to_nan(wildfire_df, 'Perimeter Contained (%)')

In [None]:
# Total of value types
wildfire_df['Perimeter Contained (%)'].apply(type).value_counts()

In [None]:
# Total of null values
wildfire_df['Perimeter Contained (%)'].isnull().sum()

## Preparing column 'Personnel Involved'

In [None]:
# Check elements type
wildfire_df['Personnel Involved'].apply(type)

In [None]:
# Get total value types
wildfire_df['Personnel Involved'].apply(type).value_counts()

In [None]:
# Get total sum of null values
wildfire_df['Personnel Involved'].isnull().sum()

In [None]:
# Convert string digits to int
wildfire_df = mt.convert_to_int(wildfire_df, 'Personnel Involved')

In [None]:
# Convert no digits strings to NANs
wildfire_df = mt.str_to_nan(wildfire_df, 'Personnel Involved')

In [None]:
wildfire_df = mt.convert_to_string(wildfire_df, 'Personnel Involved')

In [None]:
# Set column to 'Int64' to have a column both with integer and NAN values
wildfire_df['Personnel Involved'] = wildfire_df['Personnel Involved'] .astype('Int64')

In [None]:
# Show value type for shown columns side by side
wildfire_df[['Fire Size (Acres)','Personnel Involved','Perimeter Contained (%)']].applymap(type)

In [None]:
# To finalize the numeric columns preparation we save the dataframe as a csv file
wildfire_df.to_csv('blazes/fires_v2.csv', index = False)

# Arrange Categorical Columns

## Preparing Column 'Coordinates'

In [10]:
# Read the csv file into a new dataframe
wildf = pd.read_csv('blazes/fires_v2.csv')

In [11]:
# Make a copy of the dataframe
wildf_dfv2 = wildf.copy()

In [22]:
# Copy the values from column 'Location' to the corresponding cells in column 'Coordinates'
wildf_dfv2 = mt.emend_values(df, wildf_dfv2, 'Location', 'Coordinates', r'-?\d+\.?\d+\s*latitude,?\s*-?\d+\.?\d+\s*longitude')

In [23]:
# Copy the values from column 'Cause' to the corresponding cells in column 'Coordinates'
wildf_dfv2 = mt.emend_values(df, wildf_dfv2, 'Cause', 'Coordinates', r'-?\d+\.?\d+\s*latitude,?\s*-?\d+\.?\d+\s*longitude')

In [24]:
wildf_dfv2['Coordinates'].isnull().sum()

11

In [25]:
# Create a new column 'Latitude' with the values that go along with 'latitude' in column 'Coordinates'
wildf_dfv2 = mt.create_new_col(wildf_dfv2, r'-?\d+\.?\d+\s*(?=latitude)', 'Coordinates', 'Latitude')

In [26]:
# Create a new column 'Longitude' with the values that go along with 'longitude' in column 'Coordinates'
wildf_dfv2 = mt.create_new_col(wildf_dfv2, r'-?\d+\.?\d+\s*(?=longitude)', 'Coordinates', 'Longitude')

In [27]:
# Show value types totals
wildf_dfv2['Latitude'].apply(type).value_counts()

<class 'str'>    100
Name: Latitude, dtype: int64

In [28]:
# Show empty strings totals
(wildf_dfv2['Latitude'] == '').sum()

37

In [29]:
# Fill empty strings with NaNs values in column 'Longitude'
wildf_dfv2['Longitude'].replace(r'^\s*$', np.nan, regex=True, inplace = True)

In [30]:
# Fill empty strings with NaNs values in column 'Latitude'
wildf_dfv2['Latitude'].replace(r'^\s*$', np.nan, regex=True, inplace = True)

In [31]:
# Show value types totals
wildf_dfv2['Latitude'].apply(type).value_counts()

<class 'str'>      63
<class 'float'>    37
Name: Latitude, dtype: int64

In [32]:
# Show null values totals
wildf_dfv2['Latitude'].isnull().sum()

37

In [33]:
# Convert strings to float in column 'Latitude'
wildf_dfv2['Latitude'] = pd.to_numeric(wildf_dfv2['Latitude'], downcast='float')

In [34]:
# Convert strings to float in column 'Longitude'
wildf_dfv2['Longitude'] = pd.to_numeric(wildf_dfv2['Longitude'], downcast='float')

In [35]:
# Drop column 'Coordinates'
wildf_dfv2.drop(['Coordinates'], axis=1, inplace=True)

## Preparing Column 'Date'

In [36]:
# Copy the values from column 'Cause' to the corresponding cells in column 'Date'
wildf_dfv2 = mt.emend_values(df, wildf_dfv2, 'Cause', 'Date', r'(\d{2}:\d{2}\s?(AM|PM))$')

In [37]:
# Remove string 'approx.' from the string containing the day and hour
wildf_dfv2 = mt.strip_symbol(wildf_dfv2, 'Date', 'approx.')

In [38]:
# Convert date entries from 'Date' column to ISO format
wildf_dfv2 = mt.convert_to_isodate(wildf_dfv2, 'Date')

In [39]:
# Show columns 'Date' and 'ISO Date' side by side
wildf_dfv2.loc[:,['Date', 'ISO Date']].head()

Unnamed: 0,Date,ISO Date
0,"Saturday August 15th, 2020 07:15 AM",2020-08-15T07:15:00
1,"Saturday August 01st, 2020 04:30 PM",2020-08-01T16:30:00
2,"Thursday August 13th, 2020 03:15 PM",2020-08-13T15:15:00
3,"Friday July 31st, 2020 05:15 PM",2020-07-31T17:15:00
4,"Friday August 14th, 2020 11:43 AM",2020-08-14T11:43:00


In [40]:
# Drop column 'Date'
wildf_dfv2.drop(['Date'], axis=1, inplace=True)

## Preparing Column 'Cause'

In [41]:
# Get rid of strings of dates
wildf_dfv2 = mt.strip_string(wildf_dfv2, 'Cause', r'-?\d+\.?\d+\s*latitude,?\s*-?\d+\.?\d+\s*longitude')

In [42]:
# Get rid of strings of coordinates
wildf_dfv2 = mt.strip_string(wildf_dfv2, 'Cause', r'(\d{2}:\d{2}\s?(AM|PM))$')

In [43]:
# Remove trailing and leading whitespaces
wildf_dfv2.loc[:, 'Cause'] = wildf_dfv2.loc[:, 'Cause'].apply(lambda x: x.strip() if type(x)==str else x)

In [44]:
# Replace empty strings with NaN values
wildf_dfv2['Cause'].replace(r'^\s*$', np.nan, regex=True, inplace = True)

In [45]:
# Create and populate set with causes of fire
causes = set()
for item in wildf_dfv2['Cause']:
    if isinstance(item, str):
        if item not in causes:
            causes.add(item)
print(causes)

{'Piece From Faulty Catalytic Converter', 'Human-caused - Suspected Arson', 'Under Investigation', 'Human Caused, Under Investigation', 'Unkown', 'Human', 'Human-caused', 'Lightning/natural', 'Natural, Lightning', 'Debris Burning', 'Accidental Roadside Start', 'Unknown, Under Investigation', 'Human Caused', 'Lightning', 'Powerline', 'Unknown'}


In [46]:
# List causes of fire from set 'causes'
i=1
for item in causes:
    print(i,'->',item)
    i += 1

1 -> Piece From Faulty Catalytic Converter
2 -> Human-caused - Suspected Arson
3 -> Under Investigation
4 -> Human Caused, Under Investigation
5 -> Unkown
6 -> Human
7 -> Human-caused
8 -> Lightning/natural
9 -> Natural, Lightning
10 -> Debris Burning
11 -> Accidental Roadside Start
12 -> Unknown, Under Investigation
13 -> Human Caused
14 -> Lightning
15 -> Powerline
16 -> Unknown


In [47]:
# Ad hoc function for replacing and fixing redundant values in column 'Cause'
result = ""
def is_match(pattern, x, word):
    global result
    if isinstance(x, str):
        match = re.search(pattern, x)
        if match:
            result = x.replace(match.string, word)
        else:
            return False
    else:
        return False
    return True    

In [48]:
# Using function 'is_match' with a lambda function
wildf_dfv2.loc[:, 'Cause'] = wildf_dfv2.loc\
[:, 'Cause'].apply(lambda x:x.replace(x, result) if\
is_match(r'Lightning', x, 'Lightning') else x)

In [49]:
# Using function 'is_match' with a lambda function
wildf_dfv2.loc[:, 'Cause'] = wildf_dfv2.loc\
[:, 'Cause'].apply(lambda x:x.replace(x, result)\
if is_match(r'Human', x, 'Human Caused') else x)

In [50]:
# Using function 'is_match' with a lambda function
wildf_dfv2.loc[:, 'Cause'] = wildf_dfv2.loc\
[:, 'Cause'].apply(lambda x:x.replace(x, result)\
                   if is_match(r'Unk[n]?own', x, 'Unknown') else x)

In [51]:
wildf_dfv2.loc[:, 'Cause'].head()

0          NaN
1    Lightning
2    Powerline
3    Lightning
4      Unknown
Name: Cause, dtype: object

## Creating column Region

In [52]:
# Get the location given by latitude and longitude coordinates
def get_location(coordinates):
    return rg.search(coordinates)

In [53]:
# Function for creating column 'Region' based on coordinates data
def create_region_col(df):
    i = 0
    for latitude, longitude in zip(wildf_dfv2['Latitude'], wildf_dfv2['Longitude']):        
        if (not math.isnan(latitude) and not math.isnan(longitude)):
           coordinates = (latitude, longitude)
           location = get_location(coordinates)
           df.at[i, 'Region'] = list(location[0].values())[3]
           i += 1            
        else:
           i += 1

In [54]:
# Call to function create_region_col. Could take over a minute depending on your pc hardware.
create_region_col(wildf_dfv2)

Loading formatted geocoded file...


In [55]:
# Checking null values after last call to create_region_col
wildf_dfv2.loc[:,  'Region'].isnull().sum()

37

In [56]:
wildf_dfv2.loc[15:24, ['Location', 'Latitude', 'Longitude', 'Region']]

Unnamed: 0,Location,Latitude,Longitude,Region
15,"25 miles west of Cody, Wyo",44.459,-109.546997,Wyoming
16,17 miles southwest of Stevens Village,65.853996,-149.578995,Alaska
17,"Darrell Willis, DFFM",33.806999,-112.119003,Arizona
18,NE of New River,,,
19,Superstition Mountains,33.418999,-111.301003,Arizona
20,Split Mountain,,,
21,"Up to 38 miles northeast of Mesa, AZ along Hwy 87.",,,
22,East side of Peavine Mountain,39.576,-119.913002,Nevada
23,23 miles from Cortez,,,
24,"5 miles north of Oak City, Utah",,,


## Appendix A Section

In [69]:
wildf_dfv21 = wildf_dfv2.copy()

In [58]:
wildf_dfv21['Region'].isnull().sum()

37

In [62]:
# Dictionary with US states names and their abbreviations
us_states={'Alabama': 'AL', 'Alaska': 'AK', 'Arizona':'AZ', 'Arkansas':'AR',
               'California': 'CA', 'Colorado':'CO', 'Connecticut':'CT', 'Delaware':'DE',
               'Florida': 'FL', 'Georgia': 'GA', 'Hawaii': 'HI', 'Idaho': 'ID',
               'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA', 'Kansas': 'KS',
               'Kentucky': 'KY', 'Louisiana': 'LA', 'Maine': 'ME', 'Maryland': 'MD',
               'Massachusetts': 'MA', 'Michigan': 'MI', 'Minnesota': 'MN', 'Mississippi': 'MS',
               'Missouri': 'MO', 'Montana': 'MT', 'Nebraska': 'NE', 'Nevada': 'NV',
               'New Hampshire': 'NH', 'New Jersey': 'NJ', 'New Mexico': 'NM', 'New York': 'NY',
               'North Carolina': 'NC', 'North Dakota': 'ND', 'Ohio': 'OH', 'Oklahoma': 'OK',
               'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI', 'South Carolina': 'SC',
               'South Dakota': 'SD', 'Tennessee': 'TN', 'Texas': 'TX', 'Utah': 'UT',
               'Vermont': 'VT', 'Virginia': 'VA', 'Washington': 'WA', 'West Virginia': 'WV',
               'Wisconsin': 'WI', 'Wyoming': 'WY'  
              }


The next function is on early stage development. Any suggestion to optimize the code and make more modular will be appreciated.

In [71]:
# Ad hoc function for completing states names in column 'Region' based on column 'Location' data
def loc_to_state(df, col):
    i=0
    flag=0
    list_count = 0
    wordup = ''
    for sentence in df[col]:
        if not isinstance(df.at[i,'Region'], str):
            if isinstance(sentence, str):
                word_list = re.findall(r'\w+', sentence)
                word_list_len = len(word_list)
                for word in word_list:
                    list_count += 1
                    if len(word)>2:
                        wordup = word.upper()
                    else:
                        wordup = word
                    for st_name, st_abbrev in us_states.items():
                        upper_st = st_name.upper()
                        if (wordup == upper_st or wordup == st_abbrev):
                            df.at[i,'Region'] = st_name
                            i+=1
                            flag = 1
                            break
                    if (list_count == word_list_len and flag ==0):
                        list_count = 0
                        i+=1
                    elif(flag == 1):
                        list_count = 0
                        flag = 0
                        break
            else:
                i+=1
        else:
            i+=1

In [77]:
# Call to function loc_to_state
loc_to_state(wildf_dfv21, 'Location')

In [101]:
# Subcardinal point Northeast (NE) concurs with Nebraska (NE)
wildf_dfv21.loc[18,['Location','Region']]

Location    NE of New River
Region             Nebraska
Name: 18, dtype: object

In [None]:
# Change the name from Nebraska to Arizona in row 18 
wildf_dfv21.at[18, 'Region'] ='Arizona'   

In [102]:
# Listing the rows of 'Location' that will be googled
i = 0
indexes = list()
for item, name in zip(wildf_dfv21['Region'], wildf_dfv21['Location']):
    if (isinstance(item, float) and not isinstance(name, float)):
        indexes.append(wildf_dfv21.index[wildf_dfv21['Location'] == name].tolist())
        print(indexes[i][0], '->', name)
        i += 1  

12 -> Ryan Carbajal
20 -> Split Mountain
23 -> 23 miles from Cortez
26 -> 14 mi. W. Sahuarita
33 -> Northeast of Tucson
40 -> Wildland Fire
53 -> Klamath National Forest
56 -> 3 miles East of Tusayan near FR 302 & 2709
61 -> Selway-Bitterroot Wilderness, Nez Perce-Clearwater National Forests
67 -> Page For Emerging Initial Attack Incidents On The Humboldt-toiyabe National Forest.
76 -> 4 miles south of White Wolf Campground and 1 mile west of Lukens Lake Trail
78 -> Keyesville Area - Black Gulch North
97 -> Peloncilo Mountain Wilderness
98 -> SAN GABRIEL CYN RD/ N RANCH RD


In [105]:
# Dictionary with the last states with keys serving as dataframe corresponding indexes
last_states = {20:'California', 23:'Colorado', 26: 'Arizona', 33: 'Arizona',\
          53: 'Oregon', 56: 'Arizona', 61: 'Idaho', 67: 'Nevada', 76: 'California',\
           78: 'California', 97: 'Arizona', 98: 'California'}

In [106]:
# Set last missing states
for key, values in last_states.items():
        wildf_dfv21.at[key, 'Region'] = values

In [109]:
wildf_dfv21.loc[[12,40], ['Incident', 'Location', 'Region']].head(41)

Unnamed: 0,Incident,Location,Region
12,Seco,Ryan Carbajal,
40,Milepost 21,Wildland Fire,


In [None]:
wildf_dfv21.at[12, 'Region'] = 'New Mexico'
wildf_dfv21.at[40, 'Region'] ='California'   

In [104]:
wildf_dfv21.loc[:, ['Incident','Location', 'Region', 'Latitude']]

Unnamed: 0,Incident,Location,Region,Latitude
0,Chimney Fire,"32.722 latitude, -99.481 longitude",Texas,32.722
1,Cassadore Springs Fire,"11 miles north of San Carlos, AZ",Arizona,33.543999
2,Bucholtz Fire,5 miles east of Eldorado,Texas,30.863001
3,Pine Gulch Fire,"18 miles north of Grand Junction, Colorado",Colorado,
4,Williams Fork,"15 miles NW of Fraser, CO",Colorado,
5,Klaus,"Douglas Ranger District on the Coronado National Forest. 11 miles southwest of Portal, AZ",Arizona,
6,",",,,
7,Bradley Fire,"42.286 latitude, -107.052 longitude",Wyoming,42.285999
8,Trash Pile Fire,"33.858 latitude, -98.831 longitude",Texas,33.858002
9,Fields Gulch Fire,Brian McGowan,Montana,46.852001


In [None]:
# Drop column 'Location'
wildf_dfv21.drop(['Location'], axis=1, inplace=True)

In [None]:
# Drop column 'Incident'
wildf_dfv21.drop(['Incident'], axis=1, inplace=True)

In [None]:
# Save dataframe to csv file
wildf_dfv21.to_csv('blazes/fires_v3.csv', index = False)

## Final Tweakings

In [None]:
# Read the csv file into a new dataframe
wildf = pd.read_csv('blazes/fires_v3.csv')

In [None]:
# Make a copy
wildf_dfv3 = wildf.copy()

In [None]:
# Show selected columns where column 'Incident Type' is 'Burned Area Emergency Response'
exclude = ['Latitude','Longitude', 'Fuels Involved', 'Region']
wildf_dfv3.loc[wildf_dfv3['Incident Type'] == 'Burned Area Emergency Response', wildf_dfv3.columns.difference(exclude, sort=False)]

In [None]:
wildf_dfv3.drop(index=wildf_dfv3[wildf_dfv3['Incident Type'] == 'Burned Area Emergency Response'].index, inplace=True)

In [None]:
# Checking total number of rows with all cells empty
wildf_dfv3.isnull().all(axis=1).sum()

In [None]:
# Drop all the rows with all cells empty
wildf_dfv3.dropna(how='all', inplace=True)

In [None]:
wildf_dfv3.shape

In [None]:
# Converting float values to integers. When saving dataframe to csv int values are saved as float
# wildf_dfv3[['Perimeter Contained (%)','Personnel Involved','Fire Size (Acres)']] = wildf_dfv3[['Perimeter Contained (%)','Personnel Involved','Fire Size (Acres)']].astype('Int64')
# Or convert only 'Pesonnel involved' and 'Fire Size (Acres)' to integer values'
wildf_dfv3[['Personnel Involved','Fire Size (Acres)']] = wildf_dfv3[['Personnel Involved','Fire Size (Acres)']].astype('Int64')

In [None]:
# Show final dataset
exclude = ['Latitude','Longitude', 'Fuels Involved']
wildf_dfv3.loc[:, wildf_dfv3.columns.difference(exclude, sort=False)].head(100)

In [None]:
wildf_dfv3.to_csv('blazes/fires_v4.csv', index = False)

# Addendum

In [None]:
# Dictionary with U.S states-capitals
us_capital_dic={
    'Alabama': 'Montgomery',
    'Alaska': 'Juneau',
    'Arizona':'Phoenix',
    'Arkansas':'Little Rock',
    'California': 'Sacramento',
    'Colorado':'Denver',
    'Connecticut':'Hartford',
    'Delaware':'Dover',
    'Florida': 'Tallahassee',
    'Georgia': 'Atlanta',
    'Hawaii': 'Honolulu',
    'Idaho': 'Boise',
    'Illinois': 'Springfield',
    'Indiana': 'Indianapolis',
    'Iowa': 'Des Monies',
    'Kansas': 'Topeka',
    'Kentucky': 'Frankfort',
    'Louisiana': 'Baton Rouge',
    'Maine': 'Augusta',
    'Maryland': 'Annapolis',
    'Massachusetts': 'Boston',
    'Michigan': 'Lansing',
    'Minnesota': 'St. Paul',
    'Mississippi': 'Jackson',
    'Missouri': 'Jefferson City',
    'Montana': 'Helena',
    'Nebraska': 'Lincoln',
    'Nevada': 'Carson City',
    'New Hampshire': 'Concord',
    'New Jersey': 'Trenton',
    'New Mexico': 'Santa Fe',
    'New York': 'Albany',
    'North Carolina': 'Raleigh',
    'North Dakota': 'Bismarck',
    'Ohio': 'Columbus',
    'Oklahoma': 'Oklahoma City',
    'Oregon': 'Salem',
    'Pennsylvania': 'Harrisburg',
    'Rhode Island': 'Providence',
    'South Carolina': 'Columbia',
    'South Dakoda': 'Pierre',
    'Tennessee': 'Nashville',
    'Texas': 'Austin',
    'Utah': 'Salt Lake City',
    'Vermont': 'Montpelier',
    'Virginia': 'Richmond',
    'Washington': 'Olympia',
    'West Virginia': 'Charleston',
    'Wisconsin': 'Madison',
    'Wyoming': 'Cheyenne'  
}

In [None]:
# Slicing tips
wildf_dfv2.loc[wildf_dfv2['Region'].isnull(), ['Location', 'Region']].head(100)

In [3]:
wildf = pd.read_csv('blazes/fires_v4.csv')

In [4]:
wildf.shape


(96, 10)

In [8]:
# Set column to 'Int64' to have a column both with integer and NAN values
wildf['Personnel Involved'] = wildf['Personnel Involved'] .astype('Int64')

In [9]:
wildf.dtypes

Cause                       object
Fuels Involved              object
Incident Type               object
Perimeter Contained (%)    float64
Personnel Involved           Int64
Fire Size (Acres)          float64
Latitude                   float64
Longitude                  float64
ISO Date                    object
Region                      object
dtype: object

In [6]:
wildf.columns

Index(['Cause', 'Fuels Involved', 'Incident Type', 'Perimeter Contained (%)',
       'Personnel Involved', 'Fire Size (Acres)', 'Latitude', 'Longitude',
       'ISO Date', 'Region'],
      dtype='object')

In [7]:
wildf.describe()

Unnamed: 0,Perimeter Contained (%),Personnel Involved,Fire Size (Acres),Latitude,Longitude
count,82.0,73.0,92.0,60.0,60.0
mean,83.95122,177.246575,11247.043478,38.6194,-111.15185
std,27.603857,288.721549,27893.11145,6.304743,8.795239
min,3.0,1.0,1.0,30.093,-149.579
25%,84.25,20.0,177.75,33.52775,-117.33575
50%,98.5,92.0,1383.5,37.9185,-110.359
75%,100.0,201.0,8989.25,42.65625,-106.61475
max,100.0,1563.0,193455.0,65.854,-97.203
