In [49]:
import pandas as pd
import requests as r
from bs4 import BeautifulSoup as bs
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns

In [50]:
import pandas as pd

# Gasoline prices data
gasoline_data = {
    'Year': [2018, 2019, 2020, 2021, 2022, 2023, 2024],
    'Jan': ['2,596', '2,352', '2,631', '2,391', '3,500', '3,555', '3,353'],
    'Feb': ['2,632', '2,412', '2,530', '2,559', '3,675', '3,622', '3,486'],
    'Mar': ['2,631', '2,620', '2,334', '2,856', '4,401', '3,660', '3,707'],
    'Apr': ['2,795', '2,894', '1,946', '2,907', '4,369', '3,839', None], 
    'May': ['2,963', '2,963', '1,946', '3,041', '4,695', '3,794', None], 
    'Jun': ['2,970', '2,814', '2,141', '3,245', '5,149', '3,821', None], 
    'Jul': ['2,930', '2,836', '2,243', '3,326', '4,768', '3,842', None], 
    'Aug': ['2,919', '2,716', '2,245', '3,351', '4,205', '4,064', None], 
    'Sep': ['2,930', '2,694', '2,260', '3,361', '3,990', '4,107', None], 
    'Oct': ['2,945', '2,741', '2,228', '3,477', '4,130', '3,910', None], 
    'Nov': ['2,733', '2,687', '2,159', '3,576', '3,958', '3,623', None], 
    'Dec': ['2,479', '2,652', '2,235', '3,505', '3,459', '3,411', None]
}

# Energy prices data
energy_data = {
    'Year': [2018, 2019, 2020, 2021, 2022, 2023, 2024],
    'Jan': ['0,135', '0,135', '0,134', '0,136', '0,147', '0,168', '0,173'],
    'Feb': ['0,135', '0,136', '0,134', '0,137', '0,148', '0,168', '0,173'],
    'Mar': ['0,135', '0,135', '0,134', '0,138', '0,150', '0,166', '0,174'],
    'Apr': ['0,134', '0,135', '0,133', '0,139', '0,151', '0,165', None], 
    'May': ['0,136', '0,136', '0,134', '0,140', '0,154', '0,165', None], 
    'Jun': ['0,139', '0,139', '0,137', '0,142', '0,160', '0,170', None], 
    'Jul': ['0,139', '0,140', '0,137', '0,143', '0,164', '0,169', None], 
    'Aug': ['0,139', '0,139', '0,137', '0,144', '0,167', '0,170', None], 
    'Sep': ['0,138', '0,139', '0,137', '0,144', '0,167', '0,171', None], 
    'Oct': ['0,136', '0,136', '0,135', '0,142', '0,166', '0,169', None], 
    'Nov': ['0,134', '0,133', '0,136', '0,142', '0,163', '0,168', None], 
    'Dec': ['0,135', '0,133', '0,136', '0,142', '0,165', '0,169', None]
}

# Create DataFrames
gasoline_df = pd.DataFrame(gasoline_data)
energy_df = pd.DataFrame(energy_data)

# Convert fuel prices to numeric
gasoline_df.iloc[:, 1:] = gasoline_df.iloc[:, 1:].apply(lambda x: x.str.replace(',', '').astype(float))
energy_df.iloc[:, 1:] = energy_df.iloc[:, 1:].apply(lambda x: x.str.replace(',', '').astype(float))

# Calculate mean of the last 3 years for each month
gasoline_mean = gasoline_df.iloc[-3:, 1:].mean()
energy_mean = energy_df.iloc[-3:, 1:].mean()

# Fill missing values with mean of last 3 years
gasoline_df.fillna(gasoline_mean, inplace=True)
energy_df.fillna(energy_mean, inplace=True)

# Round to two decimal places
gasoline_df.iloc[:, 1:] = gasoline_df.iloc[:, 1:].round(2)
energy_df.iloc[:, 1:] = energy_df.iloc[:, 1:].round(3)


gasoline_df

energy_df


Unnamed: 0,Year,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
0,2018,135.0,135.0,135.0,134.0,136.0,139.0,139.0,139.0,138.0,136.0,134.0,135.0
1,2019,135.0,136.0,135.0,135.0,136.0,139.0,140.0,139.0,139.0,136.0,133.0,133.0
2,2020,134.0,134.0,134.0,133.0,134.0,137.0,137.0,137.0,137.0,135.0,136.0,136.0
3,2021,136.0,137.0,138.0,139.0,140.0,142.0,143.0,144.0,144.0,142.0,142.0,142.0
4,2022,147.0,148.0,150.0,151.0,154.0,160.0,164.0,167.0,167.0,166.0,163.0,165.0
5,2023,168.0,168.0,166.0,165.0,165.0,170.0,169.0,170.0,171.0,169.0,168.0,169.0
6,2024,173.0,173.0,174.0,158.0,159.5,165.0,166.5,168.5,169.0,167.5,165.5,167.0


In [51]:
# Merge DataFrames on the "Year" column
ene_gas_price_df = pd.merge(gasoline_df, energy_df, on='Year', suffixes=('_gasoline', '_energy'))
year = ene_gas_price_df.filter(like="Year")
ene_gas_price_gas = ene_gas_price_df.filter(like="gasoline").map(lambda x: (x/1000))
ene_gas_price_ene = ene_gas_price_df.filter(like="energy").map(lambda x: (x/100))
ene_gas_price_df = pd.concat([year, ene_gas_price_gas, ene_gas_price_ene], axis=1)

In [52]:
year

Unnamed: 0,Year
0,2018
1,2019
2,2020
3,2021
4,2022
5,2023
6,2024


In [53]:
ene_gas_price_df.to_csv("prices_df.csv", index=False)