In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import statsmodels.api as sm
from statsmodels.formula.api import ols

In [2]:
data = pd.read_csv('measurements_clean.csv')
display(data.shape)
data.head()

(388, 16)

Unnamed: 0,distance,consume,speed,temp_inside,temp_outside,specials,gas_type,ac,rain,sun,refill_liters,refill_gas,fuel_price_liter,fuel_consume_km,trip_cost,cost_km
0,28.0,5.0,26,21.5,12,other,E10,0,0,0,45.0,E10,1.38,1.4,1.932,0.069
1,12.0,4.2,30,21.5,13,other,E10,0,0,0,37.115385,E10,1.38,0.504,0.69552,0.05796
2,11.2,5.5,38,21.5,15,other,E10,0,0,0,37.115385,E10,1.38,0.616,0.85008,0.0759
3,12.9,3.9,36,21.5,14,other,E10,0,0,0,37.115385,E10,1.38,0.5031,0.694278,0.05382
4,18.5,4.5,46,21.5,15,other,E10,0,0,0,37.115385,E10,1.38,0.8325,1.14885,0.0621


+ **HYPOTHESIS TEST**

+ Null hypothesis (H0): SP98 gasoline has better performance than E10
+ Alternative hypothesis (H1): E10 gasoline has better performance than SP98

In [15]:
E10_data = data[data['gas_type'] == 'E10']
SP98_data = data[data['gas_type'] == 'SP98']

_, p_value = stats.ttest_ind(E10_data['cost_km'], SP98_data['cost_km'], equal_var=False)

alpha = 0.05

if p_value < alpha:
    conclusion = "There is sufficient evidence to reject the null hypothesis (H0) and conclude that E10 gasoline has better performance than SP98 gasoline."
else:
    conclusion = "There is not enough evidence to reject the null hypothesis (H0) and conclude that E10 gasoline has better performance than SP98 gasoline."

print(conclusion)


There is sufficient evidence to reject the null hypothesis (H0) and conclude that E10 gasoline has better performance than SP98 gasoline.


In [16]:
E10_data = data[data['gas_type'] == 'E10']
SP98_data = data[data['gas_type'] == 'SP98']

_, p_value = stats.ttest_ind(E10_data['cost_km'], SP98_data['cost_km'], equal_var=False)
_, p_value

(-2.3785667365686978, 0.017869136647470033)

+ **Compare the hypothesis with the previous analysis of columns**

In [11]:

cost_km = data.groupby('gas_type')['cost_km'].mean()
distance = data.groupby('gas_type')['distance'].mean()

best_cost_km = cost_km.idxmin()
best_distance = distance.idxmax()

if best_cost_km == best_distance:
    best_gas_type = best_cost_km
else:
    best_gas_type = None

print("Cost per KM:")
print(cost_km)
print("\nAverage distance:")
print(distance)
print("\nBest type of gasoline based on cost per kilometer and distance:")
best_gas_type

Cost per KM:
gas_type
E10     0.068051
SP98    0.071527
Name: cost_km, dtype: float64

Average distance:
gas_type
E10     21.096250
SP98    18.639912
Name: distance, dtype: float64

Best type of gasoline based on cost per kilometer and distance:


'E10'

# Conclusion: Contemplating the distance, the cost of gasoline, the kilometers that it gives us performance, I can conclude that E10 gasoline is giving us better results per kilometer advanced.