In [71]:
import csv
import pandas as pd
import numpy as np
import statsmodels.api as sm

    
def calculate_property_indices(env_path, year=2022):
    desired_districts = ['gasabo', 'kicukiro', 'nyarugenge']
    load_dotenv(env_path)
    database_host = config("DATABASE_HOST")
    database_name = config("DATABASE_NAME")
    database_user = config("DATABASE_USER")
    database_password = config("DATABASE_PASSWORD")

    # Establish a connection to the PostgreSQL database
    conn = psycopg2.connect(
        host=database_host,
        dbname=database_name,
        user=database_user,
        password=database_password
    )

    # Create a cursor object to interact with the database
    cur = conn.cursor()
    # Create an engine for future db operations
    engine = create_engine(f"postgresql+psycopg2://{database_user}:{database_password}@{database_host}/{database_name}")

    # Function to fetch data
    def fetch_data(query):
        cur.execute(query)
        result = cur.fetchall()
        columns = [desc[0] for desc in cur.description]
        return pd.DataFrame(result, columns=columns)
    
    # Fetch data
    df_apart = fetch_data("SELECT * FROM apartment_for_sale")
    df_house = fetch_data("SELECT * FROM house_for_sale")

    # Apply price filtering
    for idx, price in enumerate(df_apart['price']):
        if price < 90000000:
            df_apart.at[idx, 'price'] = 90000000
        elif price > 480000000:
            df_apart.at[idx, 'price'] = 480000000

    # Data preprocessing
    df_apart['district'] = df_apart['district'].str.lower().str.strip()
    df_apart = df_apart.dropna(subset=['date'])
    df_apart['date'] = pd.to_datetime(df_apart['date'])
    df_apart['quarter'] = df_apart['date'].dt.quarter
    desired_districts = ['gasabo', 'kicukiro', 'nyarugenge']
    df_apart_2022 = df_apart[(df_apart['district'].isin(desired_districts)) & (df_apart['date'].dt.year == 2022)]

    # Convert price and landsize into log
    converted_df_apart_2022 = df_apart_2022.copy()
    converted_df_apart_2022 = converted_df_apart_2022[(converted_df_apart_2022["price"] != 0) & (converted_df_apart_2022["landsize"] != 0)]
    converted_df_apart_2022[["price", "landsize"]] = np.log(df_apart_2022[["price", "landsize"]])

    # Create dummies for quarters
    quarters_dummies = pd.get_dummies(converted_df_apart_2022['quarter'], prefix='2022Q')
    quarters_dummies.columns = [f'2022Q{col.split("_")[1]}' for col in quarters_dummies.columns]
    converted_df_apart_2022 = pd.concat([converted_df_apart_2022, quarters_dummies], axis=1)

    # Select necessary columns
    selected_columns = ["quarter", "landsize", "price", "2022Q1", "2022Q2", "2022Q3", "2022Q4"]
    tdhm_df = converted_df_apart_2022[selected_columns]

    # Compute coefficients for apartments
    X = tdhm_df[["2022Q1", "2022Q2", "2022Q3", "2022Q4"]]
    X = sm.add_constant(X)
    y = tdhm_df["price"]
    model = sm.OLS(y, X).fit()
    coefficients = model.params
    exponential_coefficients = np.exp(coefficients)
    sum_prices_apart = tdhm_df['price'].sum()


    # Apply price filtering for houses
    for idx, price in enumerate(df_house['price']):
        if price < 10000000:
            df_house.at[idx, 'price'] = 10000000
        elif price > 250000000:
            df_house.at[idx, 'price'] = 250000000

    # Data preprocessing for houses
    df_house['district'] = df_house['district'].str.lower().str.strip()
    df_house = df_house.dropna(subset=['date'])
    df_house['date'] = pd.to_datetime(df_house['date'])
    df_house['quarter'] = df_house['date'].dt.quarter
    df_house_2022 = df_house[(df_house['district'].isin(desired_districts)) & (df_house['date'].dt.year == 2022)]

    # Convert price and landsize into log for houses
    converted_df_house_2022 = df_house_2022.copy()
    converted_df_house_2022 = converted_df_house_2022[(converted_df_house_2022["price"] != 0) & (converted_df_house_2022["landsize"] != 0)]
    converted_df_house_2022[["price", "landsize"]] = np.log(converted_df_house_2022[["price", "landsize"]])

    # Create dummies for quarters for houses
    house_quarters_dummies = pd.get_dummies(converted_df_house_2022['quarter'], prefix='2022Q')
    house_quarters_dummies.columns = [f'2022Q{int(col.split("_")[1].split(".")[0])}' for col in house_quarters_dummies.columns]
    converted_df_house_2022 = pd.concat([converted_df_house_2022, house_quarters_dummies], axis=1)

    # Select necessary columns for houses
    house_selected_columns = ["quarter", "landsize", "price", "2022Q1", "2022Q2", "2022Q3", "2022Q4"]
    house_tdhm_df = converted_df_house_2022[house_selected_columns]

    # Compute coefficients for houses
    X_h = house_tdhm_df[["2022Q1", "2022Q2", "2022Q3", "2022Q4"]]
    X_h = sm.add_constant(X_h)
    y_h = house_tdhm_df["price"]
    model_house = sm.OLS(y_h, X_h).fit()
    house_coefficients = model_house.params
    house_exponential_coefficients = np.exp(house_coefficients)
    sum_prices_house = house_tdhm_df['price'].sum()

    # Compute weights
    Total_property_prices = sum_prices_house + sum_prices_apart
    weight_house = sum_prices_house / Total_property_prices
    weight_apart = sum_prices_apart / Total_property_prices

    # Calculate property aggregate index values for each quarter
    quarters = ['2022Q1', '2022Q2', '2022Q3', '2022Q4']
    property_aggregate_indices = []

    for quarter_str in quarters:
        if quarter_str in exponential_coefficients and quarter_str in house_exponential_coefficients:
            property_aggregate_index = (exponential_coefficients[quarter_str] * weight_apart) + (house_exponential_coefficients[quarter_str] * weight_house)
            property_aggregate_indices.append(property_aggregate_index)
        else:
            print(f"No data available for {quarter_str}")

    data_aggr = list(zip(quarters, property_aggregate_indices))

    # Prepare data for CSV export
    quarters = ['const', '2022Q1', '2022Q2', '2022Q3', '2022Q4']
    prop_data_apart = pd.DataFrame(list(zip(quarters, exponential_coefficients.values)), columns=['Quarter', 'Apartments'])
    prop_data_apart = prop_data_apart.drop(0)
    prop_df_apart = prop_data_apart.reset_index(drop=True)

    prop_data_house = pd.DataFrame(list(zip(quarters, house_exponential_coefficients.values)), columns=['Quarter', 'Houses'])
    prop_data_house = prop_data_house.drop(0)
    prop_df_house = prop_data_house.reset_index(drop=True)

    prop_df_aggr = pd.DataFrame(data_aggr, columns=['Quarter', 'Aggregated Index'])

    prop_df_merged = pd.merge(prop_df_house, prop_df_apart, on='Quarter')
    prop_df_merged = pd.merge(prop_df_merged, prop_df_aggr, on='Quarter')
    prop_df_merged = prop_df_merged.round(2)

    csv_file = "TDHM_Indices.csv"
    sql_table = "TDHM_Indices"

    prop_df_merged.to_csv(csv_file, index=False)

    try:
        prop_df_merged.to_sql(sql_table, engine, if_exists='replace', index=False)
        print(f"Data has been saved to {csv_file} and {sql_table} DB table.")
    except Exception as e:
        print(f"An error occurred while saving data to the SQL table: {str(e)}")
    cur.close()
    conn.close()

In [72]:
env_path = os.path.abspath('.env')
calculate_property_indices(env_path, year=2022)

  result = func(self.values, **kwargs)


Data has been saved to TDHM_Indices.csv and TDHM_Indices DB table.
