In [11]:
%load_ext autoreload
%autoreload 2

In [1]:
# MyRealty
from ConcreteScrapers.MyRealty.MyRealtyScrapingPipeline import MyRealtyScrapingPipeline

# Storage
from CSVStorage import CSVStorage

In [2]:
storage = CSVStorage("apartments.csv")

In [3]:
scraper_pipeline = MyRealtyScrapingPipeline(
    "https://myrealty.am/en/apartments-for-sale/7784", 
    storage
)

# GeoService

In [24]:
from GeoFeaturesExtractor import GeoFeaturesExtractor
geoService = GeoFeaturesExtractor(["supermarket", "restaurant", "store", "hospital", "subway_station", "school", "beauty_salon"], 1000)
lat, lng = geoService.location("Yerevan/Erebuni/Khorenatsi Street (ER.)")
amenities = geoService.find_nearby_places(f"{lat},{lng}")
print(amenities)

[Amenity: store, Latitude: 40.1503659, Longitude: 44.5231357, Amenity: restaurant, Latitude: 40.1416798, Longitude: 44.5222183, Amenity: school, Latitude: 40.14604459999999, Longitude: 44.5233545, Amenity: school, Latitude: 40.140908, Longitude: 44.5203911, Amenity: beauty_salon, Latitude: 40.1456645, Longitude: 44.52097089999999, Amenity: supermarket, Latitude: 40.1421264, Longitude: 44.5211657, Amenity: store, Latitude: 40.1421264, Longitude: 44.5211657, Amenity: store, Latitude: 40.1423248, Longitude: 44.5224935]


In [32]:
from bs4 import BeautifulSoup
import requests
import re

In [33]:
url = "https://bars.am/en/properties/standard/apartment/53896"
response = requests.get(url)

if response.status_code != 200 or not response.text.strip():
    print("Failed", response.status_code)

soup = BeautifulSoup(response.text, 'html.parser')

In [43]:
strong_tag = soup.find('strong', string='Apartment area (sq/m):')

def __get_quick_data(label: str, type_) -> any:
    quick_data_tag = soup.find('strong', text=f'{label}').parent
    quick_data_text = ''.join(quick_data_tag.stripped_strings).replace(f'{label}', '').strip()
    return type_(quick_data_text)

def __get_id() -> int:
    # Use a regex pattern to find the div containing the desired text
    div_tag = soup.find('div', string=re.compile("Code: (\d+)"))

    # If the tag is found, extract the value using the regex
    if div_tag:
        match = re.search("Code: (\d+)", div_tag.text)
        if match:
            value = match.group(1)
            return value
    else:
        return None
    
def __get_address() -> str:
    # Find the div tag with the specific id "listing-address-label"
    div_tag = soup.find('div', id="listing-address-label")

    # If the tag is found, extract the text content after the icon
    if div_tag:
        address = div_tag.text.replace('<i class="fa fa-map-marker"></i>', '').strip()
        return address
    else:
        return None
    
def __get_price() -> int:
    # Find the div tag with the specific class "price for-sale-2"
    div_tag = soup.find('div', class_="price for-sale-2")

    # If the tag is found, extract the text content after the icon
    if div_tag:
        price = div_tag.text.replace('<i class="fa fa-usd"></i>', '').strip().replace(",", "")
        return int(price)
    else:
        return None

In [44]:
area = __get_quick_data("Apartment area (sq/m):", float)
floor = __get_quick_data("Floor:", int)
storeys = __get_quick_data("Floors:", int)
rooms = __get_quick_data("Number of rooms:", int)
bedrooms = __get_quick_data("Number of bedrooms:", int)
bathrooms = __get_quick_data("Number of bathrooms:", int)
ceiling_height = __get_quick_data("Ceiling height (m):", float) 
building_type = __get_quick_data("Building Type:", str)
condition = __get_quick_data("Condition:", str)
id_ = __get_id()
address = __get_address()
price = __get_price()

In [45]:
amenities = [item.span.text for item in soup.findAll('li', class_='amenities-item')]


In [46]:
amenities

['Building security',
 'Heating system',
 'Open balcony',
 'Washing machine',
 'Kitchen stove',
 'Fridge',
 'Built-in furniture',
 'Kitchen furniture',
 'Transport availability']

In [56]:
image_links = [img['src'] for img in soup.findAll('img')]
final_links = []
for link in image_links:
    if "uploads/listing-pics/" in link and "_" not in link:
        final_links.append(link)
final_links

['https://bars.am/uploads/listing-pics/1967/60499b3a.jpg',
 'https://bars.am/uploads/listing-pics/1967/a083ca25.jpg',
 'https://bars.am/uploads/listing-pics/1967/e4207240.jpg',
 'https://bars.am/uploads/listing-pics/1967/fcf69ca5.jpg',
 'https://bars.am/uploads/listing-pics/1967/9a68a109.jpg',
 'https://bars.am/uploads/listing-pics/1967/404f56d9.jpg']

In [74]:
from ConcreteScrapers.Bars.BarsApartmentScraper import BarsApartmentScraper
scraper = BarsApartmentScraper("https://bars.am/en/properties/standard/apartment/53896")
scraper.scrape()
scraper.values()

{'id': '53896',
 'price': 75000,
 'facilities': ['Building security',
  'Heating system',
  'Open balcony',
  'Washing machine',
  'Kitchen stove',
  'Fridge',
  'Built-in furniture',
  'Kitchen furniture',
  'Transport availability'],
 'location': 'Yerevan/Erebuni/Khorenatsi Street (ER.)',
 'area': 78.0,
 'rooms': 3,
 'floor': 2,
 'storeys': 4,
 'bedrooms': 2,
 'bathrooms': 1,
 'ceiling_height': 3.2,
 'building_type': 'Stalinka c/o',
 'condition': 'Old major overhaul'}