### Base cartogràfica d'incendis forestals - KMZ to CSV

https://gist.github.com/mciantyre/32ff2c2d5cd9515c1ee7
    
https://geopandas.org/install.html

**https://gist.github.com/linwoodc3/0306734dfe17076dfd34e09660c198c0**  

In [2]:
import pandas as pd
from io import BytesIO,StringIO
from zipfile import ZipFile
import re,os
import numpy as np
import xml.sax, xml.sax.handler
from html.parser import HTMLParser
import pandas as pd
import geopandas as gpd
from html.parser import HTMLParser


In [56]:


class MyHTMLParser(HTMLParser):
    
    def __init__(self):
        # initialize the base class
        HTMLParser.__init__(self)
        self.inTable=False
        self.mapping = {} 
        self.buffer = ""
        self.name_tag = ""
        self.series = pd.Series()
        
    def handle_starttag(self, tag, attrs):
        if tag == 'table':
            self.inTable = True

    def handle_data(self, data):
        if self.inTable:
            self.buffer = data.strip(' \n\t').split(':')
            if len(self.buffer)==2:
                self.mapping[self.buffer[0]]=self.buffer[1]
                self.series = pd.Series(self.mapping)
        
class PlacemarkHandler(xml.sax.handler.ContentHandler):
    def __init__(self):
        self.inName = False # handle XML parser events
        self.inPlacemark = False
        self.mapping = {} 
        self.buffer = ""
        self.name_tag = ""
        
    def startElement(self, name, attributes):
        if name == "Placemark": # on start Placemark tag
            self.inPlacemark = True
            self.buffer = "" 
        if self.inPlacemark:
            if name == "name": # on start title tag
                self.inName = True # save name text to follow
            
    def characters(self, data):
        if self.inPlacemark: # on text within tag
            self.buffer += data # save text if in title
            
    def endElement(self, name):
        self.buffer = self.buffer.strip('\n\t')
        
        if name == "Placemark":
            self.inPlacemark = False
            self.name_tag = "" #clear current name
        
        elif name == "name" and self.inPlacemark:
            self.inName = False # on end title tag            
            self.name_tag = self.buffer.strip()
            self.mapping[self.name_tag] = {}
        elif self.inPlacemark:
            if name in self.mapping[self.name_tag]:
                self.mapping[self.name_tag][name] += self.buffer
            else:
                self.mapping[self.name_tag][name] = self.buffer
        self.buffer = ""
        
        
    def spatializer(row):
        """
        Function to convert string objects to Python spatial objects
        
        """
        
        #############################
        # coordinates field
        #############################
        try:
            # look for the coordinates column
            data = row['coordinates'].strip(' \t\n\r')
        except:
            pass
        try:
            import shapely
            from shapely.geometry import Polygon,LineString,Point
        except ImportError as e:
            raise ImportError('This operation requires shapely. {0}'.format(e))
        import ast
        lsp = data.strip().split(' ')
        linestring = map(lambda x: ast.literal_eval(x),lsp)
        try:
            spatial = Polygon(LineString(linestring))
            convertedpoly = pd.Series({'geometry':spatial})
            return convertedpoly
        except:
            try:
                g = ast.literal_eval(data)
                points = pd.Series({'geometry':Point(g[:2]),
                                   'altitude':g[-1]})
                return points
            except:
            
                pass
            
        
        
        try:
            # Test for latitude and longitude columns
            lat=float(row['latitude'])
            lon=float(row['longitude'])
            point = Point(lon,lat)
            convertedpoly = pd.Series({'geometry':point})
            return convertedpoly
        except:
            
            pass
    
    def htmlizer(row):
        htmlparser = MyHTMLParser()
        htmlparser.feed(row['description'])
        return htmlparser.series
        
        
def keyholemarkup2x(file,output='df'):
    """
    Takes Keyhole Markup Language Zipped (KMZ) or KML file as input. The  
    output is a pandas dataframe, geopandas geodataframe, csv, geojson, or
    shapefile.
    
    All core functionality from:
    http://programmingadvent.blogspot.com/2013/06/kmzkml-file-parsing-with-python.html
    
    Parameters
        ----------
        file : {string}
            The string path to your KMZ or .
        output : {string}
            Defines the type of output. Valid selections include:
                - shapefile - 'shp', 'shapefile', or 'ESRI Shapefile'
        Returns
        -------
        self : object
    """
    r = re.compile(r'(?<=\.)km+[lz]?',re.I)
    try:
        extension = r.search(file).group(0) #(re.findall(r'(?<=\.)[\w]+',file))[-1]
        
    
    except IOError as e:
        logging.error("I/O error {0}".format(e))
    if (extension.lower()=='kml') is True:
        buffer = file
    elif (extension.lower()=='kmz') is True:
        kmz = ZipFile(file, 'r')
        
        vmatch = np.vectorize(lambda x:bool(r.search(x)))
        A = np.array(kmz.namelist())
        sel = vmatch(A)
        buffer = kmz.open(A[sel][0],'r')
    
    else:
        raise ValueError('Incorrect file format entered.  Please provide the '
                         'path to a valid KML or KMZ file.')    
     
    
    parser = xml.sax.make_parser()
    handler = PlacemarkHandler()
    parser.setContentHandler(handler)
    parser.parse(buffer)
    
    try:
        kmz.close()
    except:
        pass
    
    df = pd.DataFrame(handler.mapping).T
    names = list(map(lambda x: x.lower(),df.columns))
    if 'description' in names:
        extradata = df.apply(PlacemarkHandler.htmlizer,axis=1)
        df = df.join(extradata)
    
    
    output = output.lower()
    
    if output=='df' or output=='dataframe' or output == None:
        result = df
        
    elif output=='csv':
        out_filename = file[:-3] + "csv"
        df.to_csv(out_filename,encoding='utf-8',sep="\t")
        result = ("Successfully converted {0} to CSV and output to"
                   " disk at {1}".format(file,out_filename))
        
    elif output=='gpd' or output == 'gdf' or output=='geoframe' or output == 'geodataframe':
        try:
            import shapely
            from shapely.geometry import Polygon,LineString,Point
        except ImportError as e:
            raise ImportError('This operation requires shapely. {0}'.format(e))
        try:
            import fiona
        except ImportError as e:
            raise ImportError('This operation requires fiona. {0}'.format(e))
        try:
            import geopandas as gpd
        except ImportError as e:
            raise ImportError('This operation requires geopandas. {0}'.format(e))
            
        geos = gpd.GeoDataFrame(df.apply(PlacemarkHandler.spatializer,axis=1))
        result = gpd.GeoDataFrame(pd.concat([df,geos],axis=1))
        
        
    elif output=='geojson' or output=='json':
        try:
            import shapely
            from shapely.geometry import Polygon,LineString,Point
        except ImportError as e:
            raise ImportError('This operation requires shapely. {0}'.format(e))
        try:
            import fiona
        except ImportError as e:
            raise ImportError('This operation requires fiona. {0}'.format(e))
        try:
            import geopandas as gpd
        except ImportError as e:
            raise ImportError('This operation requires geopandas. {0}'.format(e))
        try:
            import geojson
        except ImportError as e:
            raise ImportError('This operation requires geojson. {0}'.format(e))
            
        geos = gpd.GeoDataFrame(df.apply(PlacemarkHandler.spatializer,axis=1))
        gdf = gpd.GeoDataFrame(pd.concat([df,geos],axis=1))
        out_filename = file[:-3] + "geojson"
        gdf.to_file(out_filename,driver='GeoJSON')
        validation = geojson.is_valid(geojson.load(open(out_filename)))['valid']
        if validation == 'yes':
            
            result = ("Successfully converted {0} to GeoJSON and output to"
                      " disk at {1}".format(file,out_filename))
        else:
            raise ValueError('The geojson conversion did not create a '
                            'valid geojson object. Try to clean your '
                            'data or try another file.')
            
    elif output=='shapefile' or output=='shp' or output =='esri shapefile':
        try:
            import shapely
            from shapely.geometry import Polygon,LineString,Point
        except ImportError as e:
            raise ImportError('This operation requires shapely. {0}'.format(e))
        try:
            import fiona
        except ImportError as e:
            raise ImportError('This operation requires fiona. {0}'.format(e))
            
        try:
            import geopandas as gpd
        except ImportError as e:
            raise ImportError('This operation requires geopandas. {0}'.format(e))
            
        try:
            import shapefile
        except ImportError as e:
            raise ImportError('This operation requires pyshp. {0}'.format(e))
        
            
        geos = gpd.GeoDataFrame(df.apply(PlacemarkHandler.spatializer,axis=1))
        gdf = gpd.GeoDataFrame(pd.concat([df,geos],axis=1))
        out_filename = file[:-3] + "shp"
        gdf.to_file(out_filename,driver='ESRI Shapefile')
        sf = shapefile.Reader(out_filename)
        import shapefile
        sf = shapefile.Reader(out_filename)
        if len(sf.shapes())>0:
            validation = "yes"
        else:
            validation = "no"
        if validation == 'yes':
            
            result = ("Successfully converted {0} to Shapefile and output to"
                      " disk at {1}".format(file,out_filename))
        else:
            raise ValueError('The Shapefile conversion did not create a '
                            'valid shapefile object. Try to clean your '
                            'data or try another file.') 
    else:
        raise ValueError('The conversion returned no data; check if'
                        ' you entered a correct output file type. '
                        'Valid output types are geojson, shapefile,'
                        ' csv, geodataframe, and/or pandas dataframe.')
        
    return result



# output to geopandas
i18 =keyholemarkup2x('incendis18.kmz',output='gpd')
i17 =keyholemarkup2x('incendis17.kmz',output='gpd')
i16 =keyholemarkup2x('incendis16.kmz',output='gpd')
i15 =keyholemarkup2x('incendis15.kmz',output='gpd')
i14 =keyholemarkup2x('incendis14.kmz',output='gpd')
i13 =keyholemarkup2x('incendis13.kmz',output='gpd')
i12 =keyholemarkup2x('incendis12.kmz',output='gpd')
i11 =keyholemarkup2x('incendis11.kmz',output='gpd')

i10 =keyholemarkup2x('incendis10.kmz',output='gpd')
i09 =keyholemarkup2x('incendis09.kmz',output='gpd')
i08 =keyholemarkup2x('incendis08.kmz',output='gpd')
i07 =keyholemarkup2x('incendis07.kmz',output='gpd')
i06 =keyholemarkup2x('incendis06.kmz',output='gpd')
i05 =keyholemarkup2x('incendis05.kmz',output='gpd')
i04 =keyholemarkup2x('incendis04.kmz',output='gpd')
i03 =keyholemarkup2x('incendis03.kmz',output='gpd')
i02 =keyholemarkup2x('incendis02.kmz',output='gpd')
i01 =keyholemarkup2x('incendis01.kmz',output='gpd')
i00 =keyholemarkup2x('incendis00.kmz',output='gpd')


i99 =keyholemarkup2x('incendis99.kmz',output='gpd')
i98 =keyholemarkup2x('incendis98.kmz',output='gpd')
i97 =keyholemarkup2x('incendis97.kmz',output='gpd')
i96 =keyholemarkup2x('incendis96.kmz',output='gpd')
i95 =keyholemarkup2x('incendis95.kmz',output='gpd')
i94 =keyholemarkup2x('incendis94.kmz',output='gpd')
i93 =keyholemarkup2x('incendis93.kmz',output='gpd')
i92 =keyholemarkup2x('incendis92.kmz',output='gpd')
i91 =keyholemarkup2x('incendis91.kmz',output='gpd')
i90 =keyholemarkup2x('incendis90.kmz',output='gpd')

i89 =keyholemarkup2x('incendis89.kmz',output='gpd')
i88 =keyholemarkup2x('incendis88.kmz',output='gpd')
i87 =keyholemarkup2x('incendis87.kmz',output='gpd')
i86 =keyholemarkup2x('incendis86.kmz',output='gpd')

# plot this new file, use %matplotlib inline if you are in a notebook
%matplotlib inline
# a.plot()
# convert to shapefile
#a = keyholemarkup2x('DC_Quadrants.kml',output='shp')


In [57]:
i89.head()

Unnamed: 0,Snippet,styleUrl,SimpleData,SchemaData,ExtendedData,coordinates,LinearRing,outerBoundaryIs,innerBoundaryIs,Polygon,geometry
B89003011,,#FEATURES,15/07/89SUBIRATS2B89003011,,,"1.83081618958132,41.4200320812271,0 1.83082587...",,,,,"POLYGON Z ((1.83082 41.42003 0.00000, 1.83083 ..."
B89003022,,#FEATURES,31/07/89FONT-RUBÍ2B89003022,,,"1.59514676035698,41.4446887104627,0 1.59406967...",,,,,"POLYGON Z ((1.59515 41.44469 0.00000, 1.59407 ..."
B89003034,,#FEATURES,31/07/89FONT-RUBÍ2B89003034,,,"1.61432756061525,41.4546490659079,0 1.61396847...",,,,,"POLYGON Z ((1.61433 41.45465 0.00000, 1.61397 ..."
B89007007,,#FEATURES,27/06/89TALAMANCA2B89007007,,,"1.9318026312584,41.7265814755081,0 1.931798903...",,,,,"POLYGON Z ((1.93180 41.72658 0.00000, 1.93180 ..."
B89007008,,#FEATURES,28/06/89SANT MATEU DE BAGES2B89007008,,,"1.72497666702823,41.83435626139,0 1.7249588015...",,,,,"POLYGON Z ((1.72498 41.83436 0.00000, 1.72496 ..."


In [38]:
i12.head()

Unnamed: 0,Snippet,styleUrl,SimpleData,SchemaData,ExtendedData,coordinates,LinearRing,outerBoundaryIs,Polygon,innerBoundaryIs,geometry
2012080307,,#FEATURES,201208030724/06/12BARCELONA2,,,"2.16788763649868,41.451348362032,0 2.166494983...",,,,,"POLYGON Z ((2.16789 41.45135 0.00000, 2.16649 ..."
2012080308,,#FEATURES,201208030802/03/12TORRELLES DE FOIX2,,,"1.60627700576115,41.4177178463954,0 1.60662129...",,,,,"POLYGON Z ((1.60628 41.41772 0.00000, 1.60662 ..."
2012080309,,#FEATURES,201208030927/06/12PUJALT2,,,"1.42956851569687,41.702703282055,0 1.428911310...",,,,,"POLYGON Z ((1.42957 41.70270 0.00000, 1.42891 ..."
2012080310,,#FEATURES,201208031017/07/12CALDERS2,,,"1.9765237043329,41.779178788201,0 1.9755090590...",,,,,"POLYGON Z ((1.97652 41.77918 0.00000, 1.97551 ..."
2012080312,,#FEATURES,201208031229/06/12ELS PRATS DE REI2,,,"1.55546003767277,41.7382301638804,0 1.55522464...",,,,,"POLYGON Z ((1.55546 41.73823 0.00000, 1.55522 ..."


# Row Counter
To know how many wildfires do we have

In [1]:

row_counter=0    
for df in (i18,i17,i16,i15,i14,i13,i12,i11,i10,i09,i08,i07,i06,i05,i04,i03,i02,i01,i00,i99,i98,i97,i96,i95,i94,i93,i92,i91,i90,i89,i88,i87,i86):
    row_counter+=len(df)
    print(row_counter)

NameError: name 'i18' is not defined