In [8]:
import pandas as pd
import requests
import time
import os
import sys

In [10]:
API = os.environ['API']

In [11]:
class City:
  def __init__(self, city=None, page=None):
    if city == None or page == None:
        sys.exit('No city ID or page limit provided')
    elif type(city) != int or type(page) != int:
        sys.exit('Provide an integer value')
    else:
        self.city_id = city
        self.page_limit = page
        self.get_property_list()
        print(len(self.ID_list))
        self.output = pd.DataFrame()
        self.get_data_from_99acres()
        self.get_data_from_Zomato()   
  
  def get_property_list(self):
    count = 1
    self.ID_list = []
    while(count <= self.page_limit):
      url = f"https://www.99acres.com/api-aggregator/srp/search?property_type=1&area_unit=1&platform=DESKTOP&moduleName=GRAILS_SRP&workflow=GRAILS_SRP&page_size=30&page={count}&city={self.city_id}&preference=S&res_com=R&seoUrlType=DEFAULT"
      response = requests.get(url)
      response = response.json()
      if response['status']['code'] != 1:
        print("You are either having internet issues else 99acres throwing ReCaptcha at ypou. Try to slow down the API request and try later.")
        break
      else:
        ids = response['properties']
        c = 1
        for id in ids:
          if c != 31:
            self.ID_list.append(id['PROP_ID'])
            c += 1
      count += 1

  def get_data_from_99acres(self):
    print('Fetching data from 99acres')
    for id in self.ID_list:
      property_data = NineNineAcres(id)
      self.output = self.output.append(property_data.get_data(), ignore_index=True)
  
  def get_data_from_Zomato(self):
    modified_property_data = Zomato(self.output)
    zomato_df = modified_property_data.get_data()
    zomato_df.to_csv('raw_dataset_zomato.csv')
    print('Saved dataset')
  
  def get_df(self):
    return self.output

In [12]:
class NineNineAcres:
  def __init__(self, id=None):
    if id == None:
      sys.exit('No property ID provided')
    else:
      self.property_ID = id
  
  def get_data(self):
    try:
      url = f"https://www.99acres.com/api-aggregator/psuedoData?prop_id={self.property_ID}&stage=load"
      time.sleep(0.5)
      response = requests.get(url)
      response = response.json()

      prop_data = response['propertyDetails']['prop_data']

      lattitude = prop_data['Latitude']
      longitude = prop_data['Longitude']
      post_date = prop_data['Modify_Date']
      availability_date = prop_data['Availability_Info']['Availability_Date']
      owner_name = prop_data['company_label']
      property_name = prop_data['Prop_Name']
      id = prop_data['Building_Id']
      locality_name = prop_data ['Locality_Name']
      base_price = prop_data['Price_Per_Unit_Area']

      if 'floorPlan' in response.keys():
        if len(response['floorPlan']) > 0:
          for floor in response['floorPlan']:
            area_sqft = floor['SALEABLE_AREA_SQFT']
            price = floor['NEW_BOOKING_PRICE']['min']
            details = floor['unitDetailSectionParts']
            bedrooms = details['bedrooms']
            kitchens = details['kitchens']
            bathroom = details['bathroom']
            balcony = details['balcony']
            living = details['living']
            data = {'id': id, 
                    'lattitude': lattitude, 
                    'longitude': longitude, 
                    'post_date': post_date, 
                    'availability_date': availability_date, 
                    'owner_name': owner_name, 
                    'property_name': property_name, 
                    'locality_name': locality_name, 
                    'base_price': base_price, 
                    'area_sqft': area_sqft, 
                    'price': price, 
                    'bedrooms': bedrooms, 
                    'kitchens': kitchens, 
                    'bathroom': bathroom, 
                    'balcony': balcony, 
                    'living': living}
        else:
          area_sqft = prop_data['area_dets'].split(' ')[0]
          price = prop_data['Price']
          bedrooms = prop_data['Bedroom_Num']
          kitchens = ''
          bathroom = prop_data['Bathroom_Num']
          balcony = prop_data['Balcony_Num']
          living = ''
          data = {'id': id, 
                    'lattitude': lattitude, 
                    'longitude': longitude, 
                    'post_date': post_date, 
                    'availability_date': availability_date, 
                    'owner_name': owner_name, 
                    'property_name': property_name, 
                    'locality_name': locality_name, 
                    'base_price': base_price, 
                    'area_sqft': area_sqft, 
                    'price': price, 
                    'bedrooms': bedrooms, 
                    'kitchens': kitchens, 
                    'bathroom': bathroom, 
                    'balcony': balcony, 
                    'living': living}
      else:
        area_sqft = prop_data['area_dets'].split(' ')[0]
        price = prop_data['Price']
        bedrooms = prop_data['Bedroom_Num']
        kitchens = ''
        bathroom = prop_data['Bathroom_Num']
        balcony = prop_data['Balcony_Num']
        living = ''
        data = {'id': id, 
                  'lattitude': lattitude, 
                  'longitude': longitude, 
                  'post_date': post_date, 
                  'availability_date': availability_date, 
                  'owner_name': owner_name, 
                  'property_name': property_name, 
                  'locality_name': locality_name, 
                  'base_price': base_price, 
                  'area_sqft': area_sqft, 
                  'price': price, 
                  'bedrooms': bedrooms, 
                  'kitchens': kitchens, 
                  'bathroom': bathroom, 
                  'balcony': balcony, 
                  'living': living}
          
      return data
    except:
      print(f"ID - {self.property_ID} did not work.")


In [13]:
class Zomato:
  def __init__(self, output=None):
    self.df = output
    self.prepare_unique_coords()
    self.get_zomato_data()
    self.merge_df()
  
  def prepare_unique_coords(self):
    print('Prepearing Unique Coords')
    lattitude = self.df['lattitude'].tolist()
    longitude = self.df['longitude'].tolist()
    arr = []
    for i in range(0, len(lattitude)):
      arr.append((lattitude[i], longitude[i])) #Converting to tuple coz set dont work with nested list
    setlist = set(arr) #Converting to set get unique coords to call API
    setlist = list(setlist) #Converting back to list for iteration
    for i in range(0, len(setlist)):
      setlist[i] = list(setlist[i]) #Converting to members to list from tuple so that I can modify them
    self.unique_coords = setlist

  def get_zomato_data(self):
    print('Fetching Zomato Data')
    zomatolist = []
    for item in self.unique_coords:
      try:
        headers = {'accept': 'application/json', 'user-key': API}
        response = requests.get(f"https://developers.zomato.com/api/v2.1/geocode?lat={item[0]}&lon={item[1]}", headers=headers)
        response = response.json()
        loc = response['location']['title']
        popularity = response["popularity"]["popularity"]
        nearby = response["nearby_restaurants"]
        price = 0
        rating = 0.0
        for res in nearby:
          price += res["restaurant"]["price_range"]
          rating += float(res["restaurant"]["user_rating"]["aggregate_rating"])
        if len(nearby) != 0:
          zomato_price = price / len(nearby)
          zomato_rating = rating / len(nearby)
        else:
          zomato_price = 0
          zomato_rating = 0
        zomatolist.append([item[0], item[1], loc, popularity, zomato_price, zomato_rating])
      except:
        print(f"Coordinate - {item[0]},{item[1]} did not work.")
    self.zomato_data = zomatolist
  
  def merge_df(self):
    print('Merging data')
    def loc(x):
      for item in self.zomato_data:
        if x['lattitude'] == item[0] and x['longitude'] == item[1]:
          return item[2]
      return None
    self.df['zomato loc'] = self.df.apply(loc, axis=1)
    def popu(x):
      for item in self.zomato_data:
        if x['lattitude'] == item[0] and x['longitude'] == item[1]:
          return float(item[3])
      return None
    self.df['zomato popularity'] = self.df.apply(popu, axis=1)
    def price(x):
      for item in self.zomato_data:
        if x['lattitude'] == item[0] and x['longitude'] == item[1]:
          return float(item[4])
      return None
    self.df['zomato price'] = self.df.apply(price, axis=1)
    def rating(x):
      for item in self.zomato_data:
        if x['lattitude'] == item[0] and x['longitude'] == item[1]:
          return float(item[5])
      return None
    self.df['zomato rating'] = self.df.apply(rating, axis=1)
    
  def get_data(self):
    print('Returning Modified DataFrame')
    return self.df

In [14]:
# kolkata = City(25, 75)
# Pune = City(19, 75)
# Bangalore = City(20, 75)
Hyderabad = City(38, 75)

2250
Fetching data from 99acres
ID - Q52379246 did not work.
ID - T52064882 did not work.
ID - T52064884 did not work.
ID - Z52994056 did not work.
Prepearing Unique Coords
Fetching Zomato Data
Coordinate - 17.4876857,78.5483395 did not work.
Coordinate - 17.355763748902,78.558018542826 did not work.
Coordinate - 17.394,78.482 did not work.
Coordinate - 17.526098662168,78.394606970251 did not work.
Coordinate - 17.382017,78.4286 did not work.
Coordinate - 17.556501,78.388516 did not work.
Coordinate - 17.49618,78.41985 did not work.
Coordinate - 17.4921,78.34253 did not work.
Coordinate - 17.362733,78.415756 did not work.
Coordinate - 26.883708,80.982772 did not work.
Coordinate - 17.491903461814,78.354371488094 did not work.
Coordinate - 17.467071,78.370218 did not work.
Coordinate - 17.445968,78.361101 did not work.
Coordinate - 17.505811,78.539781 did not work.
Coordinate - 17.467407,78.476143 did not work.
Coordinate - 17.466716,78.493442 did not work.
Coordinate - 17.359177,78.374