In [1]:
#This notebook filters all the gas stations located inside Mexico City using an 
# shp. file and gas prices provided by the CRE.

In [2]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import earthpy as et
from shapely.geometry import Point, Polygon

In [3]:
#Function that receives the complete list of gas stations and a polygon representing a physical area 
#and returns the list of gas stations inside that area.
def extract_stations(polygon,stations):
    stations_inside_polygon = []
    for i in range(len(stations)):
        point = Point(stations.loc[i,"Longitud"],stations.loc[i,"Latitud"])
        if point.within(polygon) == True:
            stations_inside_polygon.append(stations.iloc[i])
    
    return stations_inside_polygon

In [4]:
#Import the data base with the polygons and the database with the gas prices.
mapa = gpd.read_file('México_Estados.shp')
stations = pd.read_excel('prices_places_gas_stations.xlsx')

In [5]:
mapa.head()

Unnamed: 0,CODIGO,ESTADO,geometry
0,MX02,Baja California,"MULTIPOLYGON (((-113.13972 29.01778, -113.2405..."
1,MX03,Baja California Sur,"MULTIPOLYGON (((-111.20612 25.80278, -111.2302..."
2,MX18,Nayarit,"MULTIPOLYGON (((-106.62108 21.56531, -106.6475..."
3,MX14,Jalisco,"POLYGON ((-101.52490 21.85664, -101.58830 21.7..."
4,MX01,Aguascalientes,"POLYGON ((-101.84620 22.01176, -101.96530 21.8..."


In [6]:
#create a template for the prices dataframe, the results obtained from the filtering will be concatenated to this dataframe.
prices_mexico = pd.DataFrame(index=range(1),columns=['place_id','name','cre_id','Longitud','Latitud','regular','premium'])

In [7]:
# Uses extract stations to obtain the gas prices of the gas stations located in each state and assigns the result to a df
for i in range(len(mapa)):
    local_prices = extract_stations(mapa.loc[i]['geometry'],stations)
    nombre_estado = mapa.loc[i]['ESTADO']
    local_prices_df = pd.DataFrame(local_prices)
    local_prices_df['State'] = mapa.loc[i]['ESTADO']
    local_prices_df = local_prices_df.drop(['Unnamed: 0'],axis=1)
    local_prices_df['regular'] = local_prices_df['regular'].replace(0,local_prices_df['regular'].median())
    local_prices_df['premium'] = local_prices_df['premium'].replace([0,0.01,1],local_prices_df['premium'].median())
    prices_mexico = pd.concat([prices_mexico,local_prices_df])
prices_mexico = prices_mexico.drop(0)

In [8]:
prices_mexico.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 12476 entries, 5 to 12632
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   place_id  12476 non-null  object 
 1   name      12476 non-null  object 
 2   cre_id    12476 non-null  object 
 3   Longitud  12476 non-null  float64
 4   Latitud   12476 non-null  float64
 5   regular   12476 non-null  float64
 6   premium   12476 non-null  float64
 7   State     12476 non-null  object 
dtypes: float64(4), object(4)
memory usage: 877.2+ KB


In [9]:
prices_mexico.head()

Unnamed: 0,place_id,name,cre_id,Longitud,Latitud,regular,premium,State
5,2044,"ESTACION RAEL, S. DE R.L. DE C.V.",PL/842/EXP/ES/2015,-117.0715,32.51342,19.99,22.79,Baja California
8,2047,"ESTACION RAEL, S. DE R.L. DE C.V.",PL/860/EXP/ES/2015,-116.6079,31.86546,20.49,22.49,Baja California
14,2053,"CIRCULO DOS, S.A. DE C.V.",PL/635/EXP/ES/2015,-117.0271,32.5279,20.39,22.99,Baja California
23,2062,Becktrop Operadora SA de CV,PL/708/EXP/ES/2015,-117.0249,32.45102,19.39,22.49,Baja California
25,2064,"ESTACION LA VICTORIA, S.A. DE C.V.",PL/636/EXP/ES/2015,-117.0466,32.53171,19.99,22.99,Baja California


In [13]:
#Filter gas stations from mexico city and export file to excel
prices_cdmx = prices_mexico[prices_mexico['State']=='Distrito Federal']
prices_cdmx.to_excel('prices_cdmx.xlsx')