### Importing Libraries

In [38]:
import pandas as pd
from dotenv import load_dotenv
import os
import requests
from pprint import pprint
import psycopg2
import json

### Data Extraction

In [39]:
# Load environment variables from the .env file
load_dotenv()

# Retrieve environment variables
rapidapi_key = os.getenv('RAPIDAPI_KEY')
rapidapi_host = os.getenv('RAPIDAPI_HOST')

# Define the headers using the variables
headers = {
    'x-rapidapi-key': rapidapi_key,
    'x-rapidapi-host': rapidapi_host,
}

# API request details
url = "https://realty-mole-property-api.p.rapidapi.com/randomProperties"
querystring = {"limit": "500"}

# Make the API request
response = requests.get(url, headers=headers, params=querystring)

# Print the response
pprint(response.json())

[{'addressLine1': '1042 Riverside Dr',
  'assessorID': '264-54-019',
  'bathrooms': 1,
  'bedrooms': 2,
  'city': 'San Jose',
  'county': 'Santa Clara',
  'features': {'fireplace': True,
               'floorCount': 1,
               'garage': True,
               'garageType': 'Garage',
               'roomCount': 5,
               'unitCount': 1},
  'formattedAddress': '1042 Riverside Dr, San Jose, CA 95125',
  'id': '1042-Riverside-Dr,-San-Jose,-CA-95125',
  'lastSaleDate': '2021-04-02T00:00:00.000Z',
  'lastSalePrice': 999000,
  'latitude': 37.312376,
  'legalDescription': 'TR 29 LOT 10',
  'longitude': -121.900219,
  'lotSize': 4945,
  'owner': {'mailingAddress': {'addressLine1': '1042 Riverside Dr',
                               'city': 'San Jose',
                               'formattedAddress': '1042 Riverside Dr, San '
                                                   'Jose, CA 95125',
                               'id': '1042-Riverside-Dr,-San-Jose,-CA-95125',
          

In [41]:
data = response.json()

file_name = 'real_estate.json'

with open(file_name, 'w') as file:
    json.dump(data, file, indent=4)


#real_estate_df = pd.DataFrame([data])

real_estate_df = pd.read_json('real_estate.json')
real_estate_df.head()


Unnamed: 0,bathrooms,bedrooms,squareFootage,county,propertyType,addressLine1,city,state,zipCode,formattedAddress,...,lotSize,taxAssessment,propertyTaxes,lastSalePrice,lastSaleDate,owner,id,longitude,latitude,addressLine2
0,1.0,2.0,930.0,Santa Clara,Single Family,1042 Riverside Dr,San Jose,CA,95125,"1042 Riverside Dr, San Jose, CA 95125",...,4945.0,"{'2023': {'value': 1039359, 'land': 936360, 'i...",{'2023': {'total': 13416}},999000.0,2021-04-02T00:00:00.000Z,"{'names': ['Allison J Brooks', 'WILLIAM PETERM...","1042-Riverside-Dr,-San-Jose,-CA-95125",-121.900219,37.312376,
1,5.0,6.0,3318.0,Albemarle,Single Family,6909 Windmere Ln,Crozet,VA,22932,"6909 Windmere Ln, Crozet, VA 22932",...,20081.0,"{'2021': {'value': 674100, 'land': 225700, 'im...","{'2020': {'total': 5801}, '2024': {'total': 83...",675000.0,2016-06-03T00:00:00.000Z,"{'names': ['Patrick R Latimer', 'KATHERINE A L...","6909-Windmere-Ln,-Crozet,-VA-22932",-78.716506,38.067079,
2,1.5,3.0,1452.0,Lake,Single Family,38320 N Chicago Ave,Wadsworth,IL,60083,"38320 N Chicago Ave, Wadsworth, IL 60083",...,13504.0,"{'2022': {'value': 49413, 'land': 5739, 'impro...",{'2022': {'total': 2127}},,,"{'names': ['TRUST 38320'], 'mailingAddress': {...","38320-N-Chicago-Ave,-Wadsworth,-IL-60083",-87.924063,42.418909,
3,3.0,3.0,1518.0,Miami-Dade,Condo,2050 Sw 122nd Ave,Miami,FL,33175,"2050 Sw 122nd Ave, Apt 22, Miami, FL 33175",...,,"{'2020': {'value': 195000}, '2021': {'value': ...","{'2020': {'total': 3467}, '2022': {'total': 43...",310000.0,2022-01-14T00:00:00.000Z,"{'names': ['Carlos Cortes Bayardo', 'Cecilia C...","2050-Sw-122nd-Ave,-Apt-22,-Miami,-FL-33175",-80.391202,25.74855,Apt 22
4,1.0,2.0,1117.0,Kern,Land,3008 Crestline Rd,Bakersfield,CA,93306,"3008 Crestline Rd, Bakersfield, CA 93306",...,7405.0,"{'2019': {'value': 36250, 'land': 9640, 'impro...","{'2019': {'total': 1126}, '2023': {'total': 13...",,,"{'names': ['PATRICIA TOPPING'], 'mailingAddres...","3008-Crestline-Rd,-Bakersfield,-CA-93306",-118.951116,35.378973,


In [42]:
real_estate_df.columns

Index(['bathrooms', 'bedrooms', 'squareFootage', 'county', 'propertyType',
       'addressLine1', 'city', 'state', 'zipCode', 'formattedAddress',
       'yearBuilt', 'features', 'assessorID', 'legalDescription',
       'subdivision', 'zoning', 'ownerOccupied', 'lotSize', 'taxAssessment',
       'propertyTaxes', 'lastSalePrice', 'lastSaleDate', 'owner', 'id',
       'longitude', 'latitude', 'addressLine2'],
      dtype='object')

In [43]:
real_estate_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 27 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   bathrooms         375 non-null    float64
 1   bedrooms          344 non-null    float64
 2   squareFootage     387 non-null    float64
 3   county            499 non-null    object 
 4   propertyType      410 non-null    object 
 5   addressLine1      500 non-null    object 
 6   city              500 non-null    object 
 7   state             500 non-null    object 
 8   zipCode           500 non-null    int64  
 9   formattedAddress  500 non-null    object 
 10  yearBuilt         369 non-null    float64
 11  features          452 non-null    object 
 12  assessorID        354 non-null    object 
 13  legalDescription  347 non-null    object 
 14  subdivision       312 non-null    object 
 15  zoning            180 non-null    object 
 16  ownerOccupied     326 non-null    float64
 1

### Transformation layer