In [1]:
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib as plt
import math
import itertools
import statsmodels.api as sm

In [2]:
# Function that calculates the distnce between two points in km, using geocoordinates.
def euc_distance(lat1, lon1, lat2, lon2):
    p = math.pi/180
    a = 0.5 - math.cos((lat2-lat1)*p)/2 + math.cos(lat1*p) * math.cos(lat2*p) * (1-math.cos((lon2-lon1)*p))/2
    d = 12742 * math.asin(math.sqrt(a))
    return  d

In [10]:
#Function that calculates the potential savings of driving to a diferent gas station. 
#Calculates the savings for all gas stations within the specified radius.
def get_savings(stations,reference_distance,mileage,liters,fuel_type):
    total_savings = []
    j = 1
    for i in range(len(stations)):
        for j in range(i+1,len(stations)):
            distance = euc_distance(stations.iloc[i,4],stations.iloc[i,3],
                                     stations.iloc[j,4],stations.iloc[i,3])
            if distance <= reference_distance and stations.iloc[j][fuel_type]<stations.iloc[i][fuel_type]:
                price_difference = stations.iloc[i][fuel_type] - stations.iloc[j][fuel_type]
                savings = price_difference * liters - (2*(stations.iloc[j][fuel_type]*distance/mileage))
                total_savings.append({'gas_1': stations.iloc[i]["place_id"],
                                      'price_1': stations.iloc[i][fuel_type],                                      
                                      'gas_2': stations.iloc[j]["place_id"],
                                      'price_2': stations.iloc[j][fuel_type],
                                      'distance': distance,
                                      'savings': savings,})
            else:
                pass
    return total_savings

In [4]:
#Import gas prices
gas_stations = pd.read_excel('prices_cdmx.xlsx', index_col=0)

In [5]:
gas_stations.head()

Unnamed: 0,place_id,name,cre_id,Longitud,Latitud,regular,premium,State
32,2071,"Gasomac, S.A. de C.V.",PL/650/EXP/ES/2015,-99.13844,19.3231,20.99,23.59,Distrito Federal
33,2072,"Autoconsumo los Pinos, S.A. de C.V.",PL/866/EXP/ES/2015,-99.16505,19.30205,20.79,22.49,Distrito Federal
114,2153,"Inmobiliaria RAVS, S.A. de C.V.",PL/695/EXP/ES/2015,-99.08196,19.37504,21.19,22.99,Distrito Federal
118,2157,"Gasolinería México - Ajusco 1, S.A. de C.V.",PL/821/EXP/ES/2015,-99.19692,19.2289,20.19,21.99,Distrito Federal
228,2267,"PETROMAX, S.A. DE C.V.",PL/537/EXP/ES/2015,-99.10912,19.47922,20.99,22.99,Distrito Federal


In [12]:
#This codes runs the "get_savings" function for all gas stations in a radius between 4km and 10km. Only the gas stations
#changes that resulted in net savings will be added to the results dataframe. This function assumes the car has a mileage of
#14 km per liter and it is fueling 40 liters.
results = []
for radious in np.arange(4.0,10.0,.1):
    total_savings = get_savings(gas_stations,radious,14,40,"regular")
    savings_df = pd.DataFrame(total_savings)
    successes = savings_df[savings_df['savings'] > 0].sort_values('savings',ascending=True)
    number_successes = len(successes)
    successes_perc = number_successes/len(savings_df)
    percentil_2 = savings_df['savings'].quantile(0.025)
    percentil_97 = savings_df['savings'].quantile(0.975)
    savings_analysis = {'radio': radious, 'number_experiments': len(savings_df), 'successes': number_successes,
                        'successes_perc': successes_perc, 'percentil_2.5': percentil_2, 'percentil_97.5': percentil_97,
                       'average_savings': savings_df['savings'].mean()}
    results.append(savings_analysis)    

In [14]:
results_df = pd.DataFrame(results)

In [15]:
results_df.head()

Unnamed: 0,radio,number_experiments,successes,successes_perc,percentil_2.5,percentil_97.5,average_savings
0,4.0,8630,7718,0.894322,-5.442652,71.787565,23.986252
1,4.1,8825,7871,0.891898,-5.702559,71.555979,23.897411
2,4.2,9048,8039,0.888484,-6.030789,71.348514,23.786538
3,4.3,9264,8201,0.885255,-6.17171,71.55911,23.755691
4,4.4,9465,8356,0.882831,-6.334657,71.545384,23.76688


In [19]:
total_savings['savings'].hist(bins=100)

TypeError: list indices must be integers or slices, not str

In [170]:
#exporting results to excel
compilacion_resultados_df.to_excel('compilacion_experimentos_gasolina_2.xlsx')

In [144]:
#Use the get_savings function, using 
total_savings = get_savings(gas_stations,3,15,40,"regular")

In [70]:
total_savings_df = pd.DataFrame(total_savings)