In [1]:
import sys
!{sys.executable} -m pip install google-cloud-bigquery



In [2]:
!pip install prettytable



In [3]:
!pip install db-dtypes



In [4]:
!pip install pandas-gbq



In [5]:
from google.cloud import bigquery


# Create a BigQuery client
client = bigquery.Client(project="degroup11")

In [6]:
from prettytable import PrettyTable
import pandas as pd
from prettytable import PrettyTable

In [7]:
# step 1 with table as output

QUERY = ('''
    SELECT 
    ind.gross_salary,
    ind.alimony_amount,
    ind.has_spouse,
    sp.gross_salary AS sp_gross_salary,
    sp.alimony_amount AS sp_alimony_amount,
    CAST(
        IF(ind.has_spouse = FALSE, 
            ind.gross_salary - 12 * ind.alimony_amount, 
            IF(ind.gross_salary > sp.gross_salary, 
            ind.gross_salary - 12 * ind.alimony_amount + 1/3 * (sp.gross_salary - 12 * sp.alimony_amount),
            sp.gross_salary - 12 * sp.alimony_amount + 1/3 * (ind.gross_salary - 12 * ind.alimony_amount))
        ) AS INT64
    ) AS house_spendable_income
FROM 
    degroup11.group11dataset.individuals ind
LEFT JOIN 
    degroup11.group11dataset.spouse sp
ON 
    ind.spouse_ID = sp.spouse_ID
    ''')
query_job = client.query(QUERY)  # API request
rows = query_job.result()  # Waits for query to finish

# Get column names
columns = [field.name for field in query_job.result().schema]

# Create a PrettyTable
table = PrettyTable(columns)

# Add rows to the table
for row in rows:
    table.add_row([row[column] for column in columns])

# Print the table
print(table[:10])

+--------------+----------------+------------+-----------------+-------------------+------------------------+
| gross_salary | alimony_amount | has_spouse | sp_gross_salary | sp_alimony_amount | house_spendable_income |
+--------------+----------------+------------+-----------------+-------------------+------------------------+
|    75526     |       0        |    True    |      59122      |         0         |         95233          |
|    134600    |      1200      |   False    |      101412     |         0         |         120200         |
|    144798    |       0        |    True    |      149733     |         0         |         197999         |
|    150580    |       0        |    True    |      47582      |         0         |         166441         |
|    186763    |       0        |   False    |      181201     |         0         |         186763         |
|    59116     |       0        |    True    |      38986      |         0         |         72111          |
|    18988

In [8]:
# step 1 with pandas dataframe as output

client = bigquery.Client()

QUERY = ('''
    SELECT 
    ind.gross_salary,
    ind.alimony_amount,
    ind.has_spouse,
    sp.gross_salary AS sp_gross_salary,
    sp.alimony_amount AS sp_alimony_amount,
    CAST(
        IF(ind.has_spouse = FALSE, 
            ind.gross_salary - 12 * ind.alimony_amount, 
            IF(ind.gross_salary > sp.gross_salary, 
            ind.gross_salary - 12 * ind.alimony_amount + 1/3 * (sp.gross_salary - 12 * sp.alimony_amount),
            sp.gross_salary - 12 * sp.alimony_amount + 1/3 * (ind.gross_salary - 12 * ind.alimony_amount))
        ) AS INT64
    ) AS house_spendable_income
FROM 
    degroup11.group11dataset.individuals ind
LEFT JOIN 
    degroup11.group11dataset.spouse sp
ON 
    ind.spouse_ID = sp.spouse_ID
''')

query_job = client.query(QUERY)  # API request

# Use list comprehension to convert rows to a list
rows = [row for row in query_job.result()]

# Get column names
columns = [field.name for field in query_job.result().schema]

# Create a DataFrame manually
result_df = pd.DataFrame(data=[list(row.values()) for row in rows], columns=columns)

# Now you can access and manipulate the data using Pandas functions
result_df

Unnamed: 0,gross_salary,alimony_amount,has_spouse,sp_gross_salary,sp_alimony_amount,house_spendable_income
0,75526,0,True,59122,0,95233
1,134600,1200,False,101412,0,120200
2,144798,0,True,149733,0,197999
3,150580,0,True,47582,0,166441
4,186763,0,False,181201,0,186763
...,...,...,...,...,...,...
995,76791,1415,False,101711,0,59811
996,111108,0,False,38953,0,111108
997,51688,0,True,47980,0,67681
998,193117,0,True,183747,0,254366


In [9]:
#step 2

#year_int = 4.58%
#month_int = 0.0037
#intialised in step 5

In [10]:
# start step 3

#Read table toetsinkomen

client = bigquery.Client()

QUERY = ('''
SELECT 
    *
FROM 
    degroup11.group11dataset.toetsinkomen
''')

query_job = client.query(QUERY)  # API request

# Use list comprehension to convert rows to a list
rows = [row for row in query_job.result()]

# Get column names
columns = [field.name for field in query_job.result().schema]

# Create a DataFrame manually
rente_df = pd.DataFrame(data=[list(row.values()) for row in rows], columns=columns)

# Now you can access and manipulate the data using Pandas functions
rente_df

Unnamed: 0,Toetsinkomen,Toetsrente
0,0,17.5
1,19500,17.5
2,20000,18.5
3,20050,19.5
4,21000,20.5
5,21500,21.0
6,22000,22.0
7,22500,23.5
8,23000,24.5
9,23500,25.0


In [11]:
#Add column toetsrente

result_df['Toetsrente'] = None  # Initialize the new column

# Iterate through each row in result_df
for index, row in result_df.iterrows():
    # Find the corresponding row in rente_df based on the condition
    corresponding_row = rente_df[rente_df['Toetsinkomen'] < row['house_spendable_income']].iloc[-1]
    
    # Update the 'toetsrente' column in result_df with the value from rente_df
    result_df.at[index, 'Toetsrente'] = corresponding_row['Toetsrente']

# Now result_df has a new column 'toetsrente' with the desired values
result_df

Unnamed: 0,gross_salary,alimony_amount,has_spouse,sp_gross_salary,sp_alimony_amount,house_spendable_income,Toetsrente
0,75526,0,True,59122,0,95233,33.5
1,134600,1200,False,101412,0,120200,34.5
2,144798,0,True,149733,0,197999,34.5
3,150580,0,True,47582,0,166441,34.5
4,186763,0,False,181201,0,186763,34.5
...,...,...,...,...,...,...,...
995,76791,1415,False,101711,0,59811,28.5
996,111108,0,False,38953,0,111108,34.5
997,51688,0,True,47980,0,67681,30.0
998,193117,0,True,183747,0,254366,34.5


In [12]:
#Finalise step 3

result_df['mortgage_year'] = result_df['house_spendable_income'] * result_df['Toetsrente'] * 0.01
result_df['mortgage_month'] = result_df['mortgage_year'] / 12
result_df

Unnamed: 0,gross_salary,alimony_amount,has_spouse,sp_gross_salary,sp_alimony_amount,house_spendable_income,Toetsrente,mortgage_year,mortgage_month
0,75526,0,True,59122,0,95233,33.5,31903.055,2658.587917
1,134600,1200,False,101412,0,120200,34.5,41469.0,3455.75
2,144798,0,True,149733,0,197999,34.5,68309.655,5692.47125
3,150580,0,True,47582,0,166441,34.5,57422.145,4785.17875
4,186763,0,False,181201,0,186763,34.5,64433.235,5369.43625
...,...,...,...,...,...,...,...,...,...
995,76791,1415,False,101711,0,59811,28.5,17046.135,1420.51125
996,111108,0,False,38953,0,111108,34.5,38332.26,3194.355
997,51688,0,True,47980,0,67681,30.0,20304.3,1692.025
998,193117,0,True,183747,0,254366,34.5,87756.27,7313.0225


In [19]:
#Step5
months = 360
monthly_int_rate = 0.0037

def mortgagefun(pay, m, int):
    return pay * ((1-((1+int)**-m))/int)

result_df['possible_mortgage'] = result_df['mortgage_month'].apply(mortgagefun, args=(months, monthly_int_rate))
individuals_df = result_df
individuals_df

Unnamed: 0,gross_salary,alimony_amount,has_spouse,sp_gross_salary,sp_alimony_amount,house_spendable_income,Toetsrente,mortgage_year,mortgage_month,possible_mortgage
0,75526,0,True,59122,0,95233,33.5,31903.055,2658.587917,5.284134e+05
1,134600,1200,False,101412,0,120200,34.5,41469.0,3455.75,6.868551e+05
2,144798,0,True,149733,0,197999,34.5,68309.655,5692.47125,1.131419e+06
3,150580,0,True,47582,0,166441,34.5,57422.145,4785.17875,9.510886e+05
4,186763,0,False,181201,0,186763,34.5,64433.235,5369.43625,1.067214e+06
...,...,...,...,...,...,...,...,...,...,...
995,76791,1415,False,101711,0,59811,28.5,17046.135,1420.51125,2.823368e+05
996,111108,0,False,38953,0,111108,34.5,38332.26,3194.355,6.349010e+05
997,51688,0,True,47980,0,67681,30.0,20304.3,1692.025,3.363021e+05
998,193117,0,True,183747,0,254366,34.5,87756.27,7313.0225,1.453516e+06


In [14]:
##############################################3

In [15]:
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client()

QUERY = '''
    SELECT * FROM `degroup11.group11dataset.house_pricing`
'''

query_job = client.query(QUERY)  # API request

# Use list comprehension to convert rows to a list
rows = [row for row in query_job.result()]

# Get column names
columns = [field.name for field in query_job.result().schema]

# Create a DataFrame manually
houses_df = pd.DataFrame(data=[list(row.values()) for row in rows], columns=columns)

# Now you can access and manipulate the data using Pandas functions
houses_df[['Address','City','Price','Availability']]

Unnamed: 0,Address,City,Price,Availability
0,Wolfpad 66,Alkmaar,905000.0,False
1,Vikinghof 1,Hoofddorp,745000.0,False
2,Hoofdgracht 13,Naarden,1795000.0,True
3,Prins Constantijnlaan 2,Badhoevedorp,945000.0,True
4,Theo Hendriksehof 2,Beverwijk,500000.0,False
...,...,...,...,...
5550,Het Wargaren 24,Lith,525000.0,True
5551,Plein-Zuid 7,Aalten,575000.0,True
5552,Eibergsestraat 60,Haaksbergen,498500.0,False
5553,Heiweg 5,Herpen,650000.0,True


In [33]:
# Sort houses_df by 'Price' column
houses_df = houses_df.sort_values(by='Price')

def find_closest_houses(mortgage_amount):
    return list(houses_df[houses_df['Price'] <= mortgage_amount]['Address'].head(5))


In [34]:
individuals_df['possible_mortgage'][0]

528413.4211009981

In [35]:
find_closest_houses(individuals_df['possible_mortgage'][0])

['Stationsstraat 83',
 'Dentgenbachweg 11',
 'Dwarslaan 8',
 'Marijkestraat 6',
 'Aalbersestraat 6']