### Importing Libraries

In [1]:
import pandas as pd
from dotenv import load_dotenv
import os
import requests
from pprint import pprint
import psycopg2
import json

### Data Extraction

In [2]:
# Load environment variables from the .env file
load_dotenv()

# Retrieve environment variables
rapidapi_key = os.getenv('RAPIDAPI_KEY')
rapidapi_host = os.getenv('RAPIDAPI_HOST')

# Define the headers using the variables
headers = {
    'x-rapidapi-key': rapidapi_key,
    'x-rapidapi-host': rapidapi_host,
}

# API request details
url = "https://realty-mole-property-api.p.rapidapi.com/randomProperties"
querystring = {"limit": "500"}

# Make the API request
response = requests.get(url, headers=headers, params=querystring)

# Print the response
pprint(response.json())

[{'addressLine1': '3157 Rosebay Ln',
  'assessorID': 'B01-56J- -G-00106',
  'bathrooms': 2,
  'bedrooms': 3,
  'city': 'Bartlett',
  'county': 'Shelby',
  'features': {'architectureType': 'Conventional',
               'cooling': True,
               'coolingType': 'Central',
               'exteriorType': 'Brick Veneer',
               'fireplace': True,
               'fireplaceType': 'Single',
               'floorCount': 1,
               'garage': True,
               'garageType': 'Carport',
               'heating': True,
               'heatingType': 'Central',
               'roofType': 'Composition Shingle',
               'roomCount': 7,
               'unitCount': 1},
  'formattedAddress': '3157 Rosebay Ln, Bartlett, TN 38134',
  'id': '3157-Rosebay-Ln,-Bartlett,-TN-38134',
  'latitude': 35.21184,
  'legalDescription': 'WS ROSEBAY LANE',
  'longitude': -89.866508,
  'lotSize': 9975,
  'owner': {'mailingAddress': {'addressLine1': '3548 Skylight Dr E',
                       

In [3]:
data = response.json()

file_name = 'real_estate.json'

with open(file_name, 'w') as file:
    json.dump(data, file, indent=4)


#real_estate_df = pd.DataFrame([data])

real_estate_df = pd.read_json('real_estate.json')
real_estate_df.head()


Unnamed: 0,addressLine1,city,state,zipCode,formattedAddress,bedrooms,squareFootage,yearBuilt,features,county,...,taxAssessment,propertyTaxes,owner,id,longitude,latitude,ownerOccupied,lastSalePrice,lastSaleDate,addressLine2
0,3157 Rosebay Ln,Bartlett,TN,38134,"3157 Rosebay Ln, Bartlett, TN 38134",3.0,1761.0,1973.0,"{'architectureType': 'Conventional', 'cooling'...",Shelby,...,"{'2023': {'value': 42150, 'land': 7675, 'impro...",{'2022': {'total': 1429}},"{'names': ['BRANNICK DAISY L TRUST ONE A'], 'm...","3157-Rosebay-Ln,-Bartlett,-TN-38134",-89.866508,35.21184,,,,
1,410 Silver Pine Trl,Roswell,GA,30076,"410 Silver Pine Trl, Roswell, GA 30076",4.0,2664.0,1979.0,"{'architectureType': 'Colonial', 'cooling': Tr...",Fulton,...,"{'2022': {'value': 154960, 'land': 29040, 'imp...","{'2022': {'total': 4374}, '2023': {'total': 48...","{'names': ['GERKER FAMILY TRUST', 'GERKER JANE...","410-Silver-Pine-Trl,-Roswell,-GA-30076",-84.301676,34.0045,1.0,,,
2,9803 Peddlers Way,Orlando,FL,32817,"9803 Peddlers Way, Orlando, FL 32817",3.0,1440.0,1986.0,"{'cooling': True, 'coolingType': 'Central', 'e...",Orange,...,"{'2021': {'value': 102525}, '2022': {'value': ...","{'2022': {'total': 1406}, '2023': {'total': 14...","{'names': ['RICHARD A SEFICK', 'BERENICE W SEF...","9803-Peddlers-Way,-Orlando,-FL-32817",-81.249395,28.591534,1.0,84500.0,1986-10-01T00:00:00.000Z,
3,2140 Hammerle St,West Linn,OR,97068,"2140 Hammerle St, West Linn, OR 97068",2.0,2526.0,,{},Clackamas,...,,,,"2140-Hammerle-St,-West-Linn,-OR-97068",-122.625382,45.370764,,,,
4,2124 Landside Dr,Valrico,FL,33594,"2124 Landside Dr, Valrico, FL 33594",3.0,2104.0,2014.0,"{'architectureType': 'Contemporary', 'cooling'...",Hillsborough,...,"{'2021': {'value': 212818}, '2022': {'value': ...","{'2022': {'total': 3823}, '2023': {'total': 40...","{'names': ['Giovanni S Solorzano', 'Claudia So...","2124-Landside-Dr,-Valrico,-FL-33594",-82.250749,27.962256,1.0,244900.0,2014-05-07T00:00:00.000Z,


In [4]:
real_estate_df.columns

Index(['addressLine1', 'city', 'state', 'zipCode', 'formattedAddress',
       'bedrooms', 'squareFootage', 'yearBuilt', 'features', 'county',
       'assessorID', 'legalDescription', 'subdivision', 'zoning', 'bathrooms',
       'lotSize', 'propertyType', 'taxAssessment', 'propertyTaxes', 'owner',
       'id', 'longitude', 'latitude', 'ownerOccupied', 'lastSalePrice',
       'lastSaleDate', 'addressLine2'],
      dtype='object')

In [5]:
real_estate_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 27 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   addressLine1      500 non-null    object 
 1   city              500 non-null    object 
 2   state             500 non-null    object 
 3   zipCode           500 non-null    int64  
 4   formattedAddress  500 non-null    object 
 5   bedrooms          322 non-null    float64
 6   squareFootage     380 non-null    float64
 7   yearBuilt         363 non-null    float64
 8   features          451 non-null    object 
 9   county            500 non-null    object 
 10  assessorID        343 non-null    object 
 11  legalDescription  335 non-null    object 
 12  subdivision       293 non-null    object 
 13  zoning            180 non-null    object 
 14  bathrooms         364 non-null    float64
 15  lotSize           341 non-null    float64
 16  propertyType      401 non-null    object 
 1

### Transformation layer

In [6]:
real_estate_df.fillna({
    'bathrooms' : 0.0,
    'bedrooms' : 0.0,
    'squareFootage' : 0.0,
    'county' : 'Unknown',
    'propertyType' : 'Unknown',
    'yearBuilt' : 0.0,
    'features' : 'Unknown',
    'assessorID' : 'Unknown',
    'legalDescription' : 'Unknown',
    'subdivision' : 'Unknown',
    'zoning' : 'Unknown',
    'ownerOccupied' : 0.0,
    'lotSize' : 0.0,
    'taxAssessment' : 'Unknown',
    'lastSaleDate' : 'Unknown',
    'propertyTaxes' : 'Unknown',
    'lastSalePrice' : 0.0,
    'addressLine2' : 'Unknown',
    'owner' : 'Unknown'
    }, inplace = True)

In [7]:
real_estate_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 27 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   addressLine1      500 non-null    object 
 1   city              500 non-null    object 
 2   state             500 non-null    object 
 3   zipCode           500 non-null    int64  
 4   formattedAddress  500 non-null    object 
 5   bedrooms          500 non-null    float64
 6   squareFootage     500 non-null    float64
 7   yearBuilt         500 non-null    float64
 8   features          500 non-null    object 
 9   county            500 non-null    object 
 10  assessorID        500 non-null    object 
 11  legalDescription  500 non-null    object 
 12  subdivision       500 non-null    object 
 13  zoning            500 non-null    object 
 14  bathrooms         500 non-null    float64
 15  lotSize           500 non-null    float64
 16  propertyType      500 non-null    object 
 1

In [8]:
# Address Table 
Address_Table = real_estate_df[['addressLine1', 'city', 'state', 'zipCode', 'formattedAddress', 'county', 'longitude', 'latitude', 'addressLine2']].copy().reset_index(drop = True)


# Address_Table.index.name = 'address_id'
# Address_Table = Address_Table.reset_index()


# Assign a unique primary key for tax_id using a dedicated numbering system
Address_Table['address_id'] = range(29, len(Address_Table) + 29)  # Sequential numbering starting at 1

Address_Table = Address_Table[['address_id', 'addressLine1', 'city', 'state', 'zipCode', 'formattedAddress', 'county', 'longitude', 'latitude', 'addressLine2']]


Address_Table.head()

Unnamed: 0,address_id,addressLine1,city,state,zipCode,formattedAddress,county,longitude,latitude,addressLine2
0,29,3157 Rosebay Ln,Bartlett,TN,38134,"3157 Rosebay Ln, Bartlett, TN 38134",Shelby,-89.866508,35.21184,Unknown
1,30,410 Silver Pine Trl,Roswell,GA,30076,"410 Silver Pine Trl, Roswell, GA 30076",Fulton,-84.301676,34.0045,Unknown
2,31,9803 Peddlers Way,Orlando,FL,32817,"9803 Peddlers Way, Orlando, FL 32817",Orange,-81.249395,28.591534,Unknown
3,32,2140 Hammerle St,West Linn,OR,97068,"2140 Hammerle St, West Linn, OR 97068",Clackamas,-122.625382,45.370764,Unknown
4,33,2124 Landside Dr,Valrico,FL,33594,"2124 Landside Dr, Valrico, FL 33594",Hillsborough,-82.250749,27.962256,Unknown


In [9]:
# Property_info Table 


# Ensure Address_Table and real_estate_df have a common column to merge on, like 'addressLine1'
Property_info_Table = real_estate_df.merge(
    Address_Table[['address_id', 'addressLine1', 'city', 'state', 'zipCode']],
    on=['addressLine1', 'city', 'state', 'zipCode'],  # Join on these common columns
    how='left'  # Keep all rows from real_estate_df
)

# Select the required columns for the Property_info_Table
Property_info_Table = Property_info_Table[['address_id', 'assessorID', 'yearBuilt', 'legalDescription',
                                           'ownerOccupied', 'propertyType', 'owner', 'id', 'subdivision', 'zoning']].copy()

# # Reset the index and assign a new name for the index
# Property_info_Table.index.name = 'property_id'
# Property_info_Table = Property_info_Table.reset_index()


# Assign a unique primary key for tax_id using a dedicated numbering system
Property_info_Table['property_id'] = range(13, len(Property_info_Table) + 13)  # Sequential numbering starting at 1

Property_info_Table = Property_info_Table[['property_id', 'address_id', 'assessorID', 'yearBuilt', 'legalDescription',
                                           'ownerOccupied', 'propertyType', 'owner', 'id', 'subdivision', 'zoning']]





# Display the first few rows
Property_info_Table.head()






Unnamed: 0,property_id,address_id,assessorID,yearBuilt,legalDescription,ownerOccupied,propertyType,owner,id,subdivision,zoning
0,13,29,B01-56J- -G-00106,1973.0,WS ROSEBAY LANE,0.0,Single Family,"{'names': ['BRANNICK DAISY L TRUST ONE A'], 'm...","3157-Rosebay-Ln,-Bartlett,-TN-38134",WILDWOOD MANOR REV SEC A,RS-10
1,14,30,12 267106760531,1979.0,ANNEXATION KITCHEN RENOVATION NVC,1.0,Single Family,"{'names': ['GERKER FAMILY TRUST', 'GERKER JANE...","410-Silver-Pine-Trl,-Roswell,-GA-30076",WOODFIELD AT MARTINS LANDING,R1
2,15,31,07-22-31-3800-02-520,1986.0,HUNTERS TRACE UNIT 3 14/78 LOT 252,1.0,Single Family,"{'names': ['RICHARD A SEFICK', 'BERENICE W SEF...","9803-Peddlers-Way,-Orlando,-FL-32817",HUNTERS TRACE UT 3,R-1A
3,16,32,Unknown,0.0,Unknown,0.0,Single Family,Unknown,"2140-Hammerle-St,-West-Linn,-OR-97068",Unknown,Unknown
4,17,33,U1829219S5000001000070,2014.0,BONVIDA LOT 7 BLOCK 1,1.0,Single Family,"{'names': ['Giovanni S Solorzano', 'Claudia So...","2124-Landside-Dr,-Valrico,-FL-33594",BONVIDA,PD


In [10]:
# Remove the dictionary text format style in the owner column
Property_info_Table['owner'] = Property_info_Table['owner'].astype(str)



# Display the first few rows
Property_info_Table.head()

Unnamed: 0,property_id,address_id,assessorID,yearBuilt,legalDescription,ownerOccupied,propertyType,owner,id,subdivision,zoning
0,13,29,B01-56J- -G-00106,1973.0,WS ROSEBAY LANE,0.0,Single Family,"{'names': ['BRANNICK DAISY L TRUST ONE A'], 'm...","3157-Rosebay-Ln,-Bartlett,-TN-38134",WILDWOOD MANOR REV SEC A,RS-10
1,14,30,12 267106760531,1979.0,ANNEXATION KITCHEN RENOVATION NVC,1.0,Single Family,"{'names': ['GERKER FAMILY TRUST', 'GERKER JANE...","410-Silver-Pine-Trl,-Roswell,-GA-30076",WOODFIELD AT MARTINS LANDING,R1
2,15,31,07-22-31-3800-02-520,1986.0,HUNTERS TRACE UNIT 3 14/78 LOT 252,1.0,Single Family,"{'names': ['RICHARD A SEFICK', 'BERENICE W SEF...","9803-Peddlers-Way,-Orlando,-FL-32817",HUNTERS TRACE UT 3,R-1A
3,16,32,Unknown,0.0,Unknown,0.0,Single Family,Unknown,"2140-Hammerle-St,-West-Linn,-OR-97068",Unknown,Unknown
4,17,33,U1829219S5000001000070,2014.0,BONVIDA LOT 7 BLOCK 1,1.0,Single Family,"{'names': ['Giovanni S Solorzano', 'Claudia So...","2124-Landside-Dr,-Valrico,-FL-33594",BONVIDA,PD


In [11]:
display(Property_info_Table.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   property_id       500 non-null    int64  
 1   address_id        500 non-null    int64  
 2   assessorID        500 non-null    object 
 3   yearBuilt         500 non-null    float64
 4   legalDescription  500 non-null    object 
 5   ownerOccupied     500 non-null    float64
 6   propertyType      500 non-null    object 
 7   owner             500 non-null    object 
 8   id                500 non-null    object 
 9   subdivision       500 non-null    object 
 10  zoning            500 non-null    object 
dtypes: float64(2), int64(2), object(7)
memory usage: 43.1+ KB


None

In [12]:
# Tax Table

# Ensure Property_info_Table and real_estate_df have a common column to merge on.
Tax_Table = real_estate_df.merge(
    Property_info_Table[['property_id', 'yearBuilt', 'propertyType', 'id']],
    on=['yearBuilt', 'propertyType', 'id'],  # Join on these common columns
    how='left'  # Keep all rows from real_estate_df
)

# Select the required columns for the Tax_Table
Tax_Table = Tax_Table[['property_id', 'taxAssessment', 'propertyTaxes']].copy()

# # Reset the index and assign a new name for the index
# Tax_Table.index.name = 'tax_id'
# Tax_Table = Tax_Table.reset_index()


# Assign a unique primary key for tax_id using a dedicated numbering system
Tax_Table['tax_id'] = range(5, len(Tax_Table) + 5)  # Sequential numbering starting at 1

Tax_Table = Tax_Table[['tax_id', 'property_id', 'taxAssessment', 'propertyTaxes']]


# Display the first few rows
Tax_Table.head()




Unnamed: 0,tax_id,property_id,taxAssessment,propertyTaxes
0,5,13,"{'2023': {'value': 42150, 'land': 7675, 'impro...",{'2022': {'total': 1429}}
1,6,14,"{'2022': {'value': 154960, 'land': 29040, 'imp...","{'2022': {'total': 4374}, '2023': {'total': 48..."
2,7,15,"{'2021': {'value': 102525}, '2022': {'value': ...","{'2022': {'total': 1406}, '2023': {'total': 14..."
3,8,16,Unknown,Unknown
4,9,17,"{'2021': {'value': 212818}, '2022': {'value': ...","{'2022': {'total': 3823}, '2023': {'total': 40..."


In [13]:
# Remove the dictionary text format style in the owner column
Tax_Table['taxAssessment'] = Tax_Table['taxAssessment'].astype(str)
Tax_Table['propertyTaxes'] = Tax_Table['propertyTaxes'].astype(str)



# Display the first few rows
Tax_Table.head()

Unnamed: 0,tax_id,property_id,taxAssessment,propertyTaxes
0,5,13,"{'2023': {'value': 42150, 'land': 7675, 'impro...",{'2022': {'total': 1429}}
1,6,14,"{'2022': {'value': 154960, 'land': 29040, 'imp...","{'2022': {'total': 4374}, '2023': {'total': 48..."
2,7,15,"{'2021': {'value': 102525}, '2022': {'value': ...","{'2022': {'total': 1406}, '2023': {'total': 14..."
3,8,16,Unknown,Unknown
4,9,17,"{'2021': {'value': 212818}, '2022': {'value': ...","{'2022': {'total': 3823}, '2023': {'total': 40..."


In [14]:
# Sales Table

# Ensure Property_info_Table and real_estate_df have a common column to merge on.
Sales_Table = real_estate_df.merge(
    Property_info_Table[['property_id', 'yearBuilt', 'propertyType', 'id']],
    on=['yearBuilt', 'propertyType', 'id'],  # Join on these common columns
    how='left'  # Keep all rows from real_estate_df
)

# Select the required columns for the Sales_Table
Sales_Table = Sales_Table[['property_id', 'lastSalePrice', 'lastSaleDate']].copy()



# Assign a unique primary key for tax_id using a dedicated numbering system
Sales_Table['sales_id'] = range(55, len(Sales_Table) + 55)  # Sequential numbering starting at 1

Sales_Table = Sales_Table[['sales_id', 'property_id', 'lastSalePrice', 'lastSaleDate']].copy()


# Display the first few rows
Sales_Table.head()




Unnamed: 0,sales_id,property_id,lastSalePrice,lastSaleDate
0,55,13,0.0,Unknown
1,56,14,0.0,Unknown
2,57,15,84500.0,1986-10-01T00:00:00.000Z
3,58,16,0.0,Unknown
4,59,17,244900.0,2014-05-07T00:00:00.000Z


In [15]:
# Features Table

# Ensure Property_info_Table and real_estate_df have a common column to merge on.
Features_Table = real_estate_df.merge(
    Property_info_Table[['property_id', 'yearBuilt', 'propertyType', 'id']],
    on=['yearBuilt', 'propertyType', 'id'],  # Join on these common columns
    how='left'  # Keep all rows from real_estate_df
)

# Select the required columns for the Features Table
Features_Table = Features_Table[['property_id', 'bedrooms', 'squareFootage', 'bathrooms', 'lotSize', 'features']].copy()



# Assign a unique primary key for tax_id using a dedicated numbering system
Features_Table['features_id'] = range(79, len(Sales_Table) + 79)  # Sequential numbering starting at 1

Features_Table = Features_Table[['features_id','property_id', 'bedrooms', 'squareFootage', 'bathrooms', 'lotSize', 'features']].copy()


# Display the first few rows
Features_Table.head()




Unnamed: 0,features_id,property_id,bedrooms,squareFootage,bathrooms,lotSize,features
0,79,13,3.0,1761.0,2.0,9975.0,"{'architectureType': 'Conventional', 'cooling'..."
1,80,14,4.0,2664.0,2.5,11587.0,"{'architectureType': 'Colonial', 'cooling': Tr..."
2,81,15,3.0,1440.0,2.0,7506.0,"{'cooling': True, 'coolingType': 'Central', 'e..."
3,82,16,2.0,2526.0,2.0,0.0,{}
4,83,17,3.0,2104.0,2.0,8995.0,"{'architectureType': 'Contemporary', 'cooling'..."


In [16]:
# Remove the dictionary text format style in the owner column
Features_Table['features'] = Features_Table['features'].astype(str)



# Display the first few rows
Features_Table.head()

Unnamed: 0,features_id,property_id,bedrooms,squareFootage,bathrooms,lotSize,features
0,79,13,3.0,1761.0,2.0,9975.0,"{'architectureType': 'Conventional', 'cooling'..."
1,80,14,4.0,2664.0,2.5,11587.0,"{'architectureType': 'Colonial', 'cooling': Tr..."
2,81,15,3.0,1440.0,2.0,7506.0,"{'cooling': True, 'coolingType': 'Central', 'e..."
3,82,16,2.0,2526.0,2.0,0.0,{}
4,83,17,3.0,2104.0,2.0,8995.0,"{'architectureType': 'Contemporary', 'cooling'..."


In [17]:
real_estate_df.columns

Index(['addressLine1', 'city', 'state', 'zipCode', 'formattedAddress',
       'bedrooms', 'squareFootage', 'yearBuilt', 'features', 'county',
       'assessorID', 'legalDescription', 'subdivision', 'zoning', 'bathrooms',
       'lotSize', 'propertyType', 'taxAssessment', 'propertyTaxes', 'owner',
       'id', 'longitude', 'latitude', 'ownerOccupied', 'lastSalePrice',
       'lastSaleDate', 'addressLine2'],
      dtype='object')

### Loading Layer

In [18]:

# Load environment variables from the .env file
load_dotenv()


# Develop a function to get the Database connections
def get_db_connection():
    connection = psycopg2.connect(
        # Retrieve environment variables
        host = os.getenv('HOST'),
        database = os.getenv('DATABASE'),
        user = os.getenv('USER'),
        password = os.getenv('PASSWORD')
    )

    return connection

# Connect to the Database
conn = get_db_connection()


In [19]:
# Create a function that sets up the schema and tables
def create_tables():
    conn = get_db_connection()
    cursor = conn.cursor()
    create_table_query = '''
                            CREATE SCHEMA IF NOT EXISTS Zuma_Realtors;

                            DROP TABLE IF EXISTS Zuma_Realtors.Features_Table CASCADE;
                            DROP TABLE IF EXISTS Zuma_Realtors.Sales_Table CASCADE;
                            DROP TABLE IF EXISTS Zuma_Realtors.Property_info_Table CASCADE;
                            DROP TABLE IF EXISTS Zuma_Realtors.Tax_Table CASCADE;
                            DROP TABLE IF EXISTS Zuma_Realtors.Address_Table CASCADE;

                            CREATE TABLE Zuma_Realtors.Address_Table (
                                address_id INTEGER PRIMARY KEY,
                                addressLine1 VARCHAR(100000),
                                city VARCHAR(100000),
                                state VARCHAR(100000),
                                zipCode INTEGER,
                                formattedAddress VARCHAR(100000),
                                county VARCHAR(100000),
                                longitude FLOAT,
                                latitude FLOAT,
                                addressLine2 VARCHAR(100000)
                            );


                            CREATE TABLE Zuma_Realtors.Property_info_Table (
                                property_id INTEGER PRIMARY KEY,
                                address_id INTEGER,
                                assessorID VARCHAR(100000),
                                yearBuilt FLOAT,
                                legalDescription VARCHAR(100000),
                                ownerOccupied FLOAT,
                                propertyType VARCHAR(100000),
                                owner VARCHAR(100000),
                                id VARCHAR(100000),
                                subdivision VARCHAR(100000),
                                zoning VARCHAR(100000),
                                FOREIGN KEY (address_id) REFERENCES Zuma_Realtors.Address_Table(address_id)
                            );



                            CREATE TABLE Zuma_Realtors.Tax_Table (
                                tax_id INTEGER PRIMARY KEY,
                                property_id INTEGER,
                                taxAssessment VARCHAR(100000),
                                propertyTaxes VARCHAR(100000),
                                FOREIGN KEY (property_id) REFERENCES Zuma_Realtors.Property_info_Table(property_id)
                            );
 
                            

                             CREATE TABLE Zuma_Realtors.Sales_Table (
                                sales_id INTEGER PRIMARY KEY,
                                property_id INTEGER,
                                lastSalePrice FLOAT,
                                lastSaleDate VARCHAR(100000),
                                FOREIGN KEY (property_id) REFERENCES Zuma_Realtors.Property_info_Table(property_id)
                            );



                            CREATE TABLE Zuma_Realtors.Features_Table (
                                features_id INTEGER PRIMARY KEY,
                                property_id INTEGER,
                                bedrooms FLOAT,
                                squareFootage FLOAT,
                                bathrooms FLOAT,
                                lotSize FLOAT,
                                features VARCHAR(100000),
                                FOREIGN KEY (property_id) REFERENCES Zuma_Realtors.Property_info_Table(property_id)
                            );
                            '''
    cursor.execute(create_table_query)
    conn.commit()
    cursor.close()
    conn.close()

create_tables()

In [20]:
### Loading the Data into the db Tables
conn = get_db_connection()
cursor = conn.cursor()

# Insert the dataframe into SQL tables
for _,row in Address_Table.iterrows():
    cursor.execute(
        '''INSERT INTO Zuma_Realtors.Address_Table (address_id, addressLine1, city, state, zipCode, formattedAddress, county, longitude, latitude, addressLine2)
            
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''',
        (row['address_id'], row['addressLine1'], row['city'], row['state'], row['zipCode'], row['formattedAddress'], row['county'], row['longitude'],  row['latitude'], row['addressLine2'])
    )


for _,row in Property_info_Table.iterrows():
    cursor.execute(
        '''INSERT INTO Zuma_Realtors.Property_info_Table (property_id, address_id, assessorID, yearBuilt, legalDescription, ownerOccupied, propertyType, owner, id, subdivision, zoning)
            
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)''',
        (row['property_id'], row['address_id'], row['assessorID'], row['yearBuilt'], row['legalDescription'], row['ownerOccupied'], row['propertyType'], row['owner'],  row['id'], row['subdivision'], row['zoning'])
    )


for _,row in Tax_Table.iterrows():
    cursor.execute(
        '''INSERT INTO Zuma_Realtors.Tax_Table (tax_id, property_id, taxAssessment, propertyTaxes)
            
            VALUES (%s, %s, %s, %s)''',
        (row['tax_id'], row['property_id'], row['taxAssessment'], row['propertyTaxes'])
    )



for _,row in Sales_Table.iterrows():
    cursor.execute(
        '''INSERT INTO Zuma_Realtors.Sales_Table (sales_id, property_id, lastSalePrice, lastSaleDate)
            
            VALUES (%s, %s, %s, %s)''',
        (row['sales_id'], row['property_id'], row['lastSalePrice'], row['lastSaleDate'])
    )




for _,row in Features_Table.iterrows():
    cursor.execute(
        '''INSERT INTO Zuma_Realtors.Features_Table (features_id, property_id, bedrooms, squareFootage, bathrooms, lotSize, features)
            
            VALUES (%s, %s, %s, %s, %s, %s, %s)''',
        (row['features_id'], row['property_id'], row['bedrooms'], row['squareFootage'], row['bathrooms'], row['lotSize'], row['features'])
    )



# Commit Changes
conn.commit()

# Close Connection
cursor.close()
conn.close()

In [22]:
display(Property_info_Table.info())
display(Property_info_Table.columns)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   property_id       500 non-null    int64  
 1   address_id        500 non-null    int64  
 2   assessorID        500 non-null    object 
 3   yearBuilt         500 non-null    float64
 4   legalDescription  500 non-null    object 
 5   ownerOccupied     500 non-null    float64
 6   propertyType      500 non-null    object 
 7   owner             500 non-null    object 
 8   id                500 non-null    object 
 9   subdivision       500 non-null    object 
 10  zoning            500 non-null    object 
dtypes: float64(2), int64(2), object(7)
memory usage: 43.1+ KB


None

Index(['property_id', 'address_id', 'assessorID', 'yearBuilt',
       'legalDescription', 'ownerOccupied', 'propertyType', 'owner', 'id',
       'subdivision', 'zoning'],
      dtype='object')

In [23]:
Features_Table.head()

Unnamed: 0,features_id,property_id,bedrooms,squareFootage,bathrooms,lotSize,features
0,79,13,3.0,1761.0,2.0,9975.0,"{'architectureType': 'Conventional', 'cooling'..."
1,80,14,4.0,2664.0,2.5,11587.0,"{'architectureType': 'Colonial', 'cooling': Tr..."
2,81,15,3.0,1440.0,2.0,7506.0,"{'cooling': True, 'coolingType': 'Central', 'e..."
3,82,16,2.0,2526.0,2.0,0.0,{}
4,83,17,3.0,2104.0,2.0,8995.0,"{'architectureType': 'Contemporary', 'cooling'..."
