In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import sys
import logging
from logging import Logger
sys.path.append("..")

pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:.8f}'.format)

In [2]:
significant_places = { 
    "opera" : (40.18600,44.51509),
    "malibu" : (40.18306,44.50799),
    "aua" : (40.19243,44.50446),
    "zvartnoc" : (40.1523,44.4005)
}

In [3]:
# We have two datasets that have addresses and one that has coordinates. 
# We need coordinates. 
# We will write a service that converts an address to a coordinate. Then,
# We will write a service that uses coordinates to geocode.

In [63]:

def format_address(address):
    s = address
    splits = s.split(", ")
    split_count = len(splits)
    if split_count >= 3:
        s = splits[-1]

    for key, value in myrealty_mapping.items():
        s = s.replace(key, value)
    return s


In [64]:
import ast
from fuzzywuzzy import fuzz

class AddressToCoordinateConverter:
    
    def __init__(self, streets_csv_path):
        self.streets = pd.read_csv(streets_csv_path)
    
    def convert(self, address):
        return self.get_top_matched_coordinate(df = self.streets, query = address)
    
    def get_top_matched_coordinate(self, df, query, debug = False):
        max_score = 0
        top_row = None

        for index, row in df.iterrows():
            score = fuzz.ratio(query.lower(), row['name_en'].lower())
            if score > max_score:
                max_score = score
                top_row = row

        if debug:
            return top_row
        else:
            # because it is a string when we read it from streets.csv
            return self.convert_to_tuple(top_row["coordinates"])
        
    def convert_to_tuple(self, cell):
        try:
            # Use ast.literal_eval to safely evaluate the string
            return ast.literal_eval(cell)
        except (ValueError, SyntaxError):
            # Handle the case where the cell is not a valid tuple string
            return None

# Converting physical address into coordinates

In [65]:
# LONG RUN
myrealty = pd.read_csv("../data/myrealty_apartments.csv")
myrealty['location'] = myrealty['location'].apply(format_address)
converter = AddressToCoordinateConverter("streets.csv")
myrealty["coordinates"] = myrealty["location"].apply(converter.convert)

# Extracting distances to significant locations

In [60]:
import pandas as pd
from Services import GeoService

class MapFeatureAggregator:
    
    def __init__(self, geo_service: GeoService):
        self.geo_service = geo_service
    
    def significant_distances(self, data, location_col: str):
        all_distances = []
        locations = data[location_col]
        for coordinate in locations:
            distances = self.geo_service.distance_to_significant(coordinate)
            all_distances.append(distances)
            
        df = pd.DataFrame(all_distances) 
        return df
    
    def amenities_count(self, data, location_col: str):
        aggregated_amentities_count = pd.DataFrame()
        locations = data[location_col]
        for coordinate in locations:
            amenities_df = self.geo_service.get_amenities_from_point(coordinate)
            dict_to_add = amenities_df["amenity"].value_counts().to_dict()
            aggregated_amentities_count = add_row_from_dict_with_zeros(
                aggregated_amentities_count,
                dict_to_add
            )
        return aggregated_amentities_count
    
    def add_row_from_dict_with_zeros(self, df, data_dict):
        """
        Adds a row to the dataframe from a dictionary. New columns are added if they don't exist, 
        and are prepended with 'L' if they are new. Missing values are filled with 0.

        Args:
        df (pd.DataFrame): The dataframe to add the row to.
        data_dict (dict): The dictionary containing the data to add.

        Returns:
        pd.DataFrame: The updated dataframe.
        """
        # Prepend 'L' to new columns
        new_columns = {key: 'L_' + key if key not in df.columns else key for key in data_dict.keys()}
        updated_dict = {new_columns[key]: value for key, value in data_dict.items()}

        # Add missing columns to the dataframe with 0 values
        for col in new_columns.values():
            if col not in df.columns:
                df[col] = 0

        # Replace NaN in the dictionary with 0 for existing columns
        for col in df.columns:
            if col not in updated_dict:
                updated_dict[col] = 0

        # Add the new row
        new_row = pd.DataFrame([updated_dict], columns=df.columns)
        df = pd.concat([df, new_row], ignore_index=True)

        return df

In [61]:
geo_service = GeoService(
    significant_places,
    radius = 300
)
featurer = ApartmentGeoServiceFeaturer(geo_service)
significant_distances = featurer.significant_distances(myrealty, "coordinates")
amenities = featurer.amenities_count(myrealty, "coordinates")