## Extracting Data

In [None]:
%pip install selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
import time

driver = webdriver.Safari()
driver.get("https://bama.ir/car/<BRAND_NAME>")
print(driver.title)

cars = []
ads = []

SCROLL_PAUSE_TIME = 2

while len(ads) < 200:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(SCROLL_PAUSE_TIME)
    
    new_ads = driver.find_elements(By.CSS_SELECTOR, "div.bama-ad-holder")
    ads.extend([ad for ad in new_ads if ad not in ads]) 
    
    if len(new_ads) == 0:
        break  

for ad in ads[:100]: 
    year = None
    usage = None
    model = None
    price = None
    

    try:
        data = ad.find_element(By.CSS_SELECTOR, "div.bama-ad__detail-row")
        spans = data.find_elements(By.TAG_NAME, "span")
        if len(spans) >= 3:
            year = int(spans[0].text.strip())
            usage = spans[1].text.strip()
            model = spans[2].text.strip()
            if usage == "صفر کیلومتر":
                usage = 0
            if "km" in usage:
                usage = usage.replace("km", "").replace(",", "").strip()
                usage = int(usage)

    except:
        pass

    try:
        price = ad.find_element(By.CSS_SELECTOR, "span.bama-ad__price").text.strip()
        price = int(price.replace("," , ""))
    except:
        pass

    if price is not None and year is not None :
        cars.append({
            "year": year,
            "usage": usage,
            "model": model,
            "price": price
        })

for c in cars:
    print(c)

driver.quit()


## Build the model

In [None]:
%pip install pandas
import pandas as pd

df = pd.DataFrame(cars)

x = df.drop(["price"] , axis=1)
y = df["price"]



In [None]:
%pip install scikit-learn
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
x = pd.get_dummies(x, columns=["model"]) 


## Train and Predict the model with Random Forest Regressor

In [None]:
from sklearn.model_selection import train_test_split

x_train , x_test , y_train , y_test = train_test_split(x,y,test_size=0.2)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

indicator = 0
for p in y_pred:
    p_int = int(round(p))
    res = ""
    s = str(p_int)
    counter = 0

    for char in reversed(s):
        res = char + res
        counter += 1
        if counter % 3 == 0 and counter != len(s):
            res = "," + res

    print( res)
        
pd.DataFrame(x_test)
    

## Evaluate accuracy

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("MAE:", mae)
print("MSE:", mse)
print("R2 Score:", r2)
