In [28]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
from datetime import datetime, timedelta

driver = webdriver.Chrome()

start_date = datetime(2024, 3, 14)  
end_date = datetime(2023, 9, 29)  

data = []

def scrape_table(date):
    try:
        driver.get("https://kalimatimarket.gov.np/price")
        date_input = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "datePricing"))
        )

        date_input.clear()
        date_input.send_keys(date.strftime("%m-%d-%Y"))

        submit_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, "button.btn-theme.comment-btn"))
    )
        submit_button.click()

        table = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "commodityPriceParticular"))
        )

        rows = table.find_elements(By.TAG_NAME, "tr")
        for row in rows:
            cols = row.find_elements(By.TAG_NAME, "td")
            cols = [col.text for col in cols]
            if cols:
                cols.append(date.date()) 
                data.append(cols)

    except Exception as e:
        print(f"Error scraping data for {date.date()}: {e}")

current_date = start_date
driver.get("https://kalimatimarket.gov.np/lang/en")
while current_date >= end_date:
    scrape_table(current_date)
    current_date -= timedelta(days=1)

driver.quit()

# Create a DataFrame from the data
df = pd.DataFrame(data, columns=["Commodity", "Unit", "Minimum", "Maximum", "Average", "Date"])

# Print the DataFrame
print(df)

In [None]:
import pandas as pd
df_train = pd.read_csv('kalimati-tarkari-prices-from-may-2021-to-september-2023.csv',)
df_train.columns = ["Commodity", "Date", "Unit","Minimum", "Maximum", "Average"]  
df_train

In [None]:
df_train['Date'][40000]

In [None]:
df_train.describe()

In [None]:
filtered_df = df_train[["Commodity","Average","Date"]]
filtered_df

In [None]:
def convert_to_yyyy_mm_dd(date_str):
    try:
        return pd.to_datetime(date_str).strftime('%Y-%m-%d')
    except ValueError:
        return pd.to_datetime(date_str, format='%d/%m/%Y').strftime('%Y-%m-%d')

# Apply the custom function to the mixed date column
filtered_df['Date'] = filtered_df['Date'].apply(convert_to_yyyy_mm_dd)

In [None]:
filtered_df

In [None]:
filtered_df['Average'] = filtered_df['Average'].apply(lambda x: float(x.replace('Rs', '')))
filtered_df

In [None]:
type(filtered_df['Date'][0])

In [None]:
filtered_df

In [None]:
filtered_df['Date'] = pd.to_datetime(filtered_df['Date'])
filtered_df['Date'][0]

In [None]:
import matplotlib.pyplot as plt
filtered_df['Date'] = pd.to_datetime(filtered_df['Date'])

commodities = ['Tomato Small(Indian)']
for commodity in commodities:
    commodity_data = filtered_df[filtered_df['Commodity'] == commodity]
    plt.plot(commodity_data['Date'], commodity_data['Average'], label=commodity)

plt.xlabel('Date')
plt.ylabel('Average Price')
plt.title('Average Price of Commodities Over Time')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
tomato_df = filtered_df[['Commodity']=="Tomato Big(Indian)"]
filtered_df.head()

In [None]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

train_size = int(len(df) * 0.8)
train, test = filtered_df[:train_size], filtered_df.iloc[train_size:]

model = ARIMA(train['Average'], order=(5,1,0))  # Example order, adjust as needed
model_fit = model.fit()

forecast = model_fit.forecast(steps=len(test))

mse = mean_squared_error(test['Average'], forecast)
print('Mean Squared Error:', mse)

future_forecast = model_fit.forecast(steps=3)
print('Forecasted Values for Next 3 Periods:', future_forecast)
