In [None]:
import os
import json
import requests
import time 
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from datetime import datetime

from functions import load_api_key, point_plotting, plotting

pd.set_option('display.max_columns', None)

%load_ext autoreload
%autoreload 2

In [None]:
rental_data_df = pd.read_csv('inputs/rental_with_coordinates.csv')

In [None]:
income_df = pd.read_csv('inputs/TaxableIndividualsbyAssessedIncomeGroup.csv')
income_df = income_df[income_df['year_of_assessment'].between(2020, 2024)]
income_df['year_of_assessment'] = income_df['year_of_assessment'].astype(int)

In [None]:
income_df

# Number of taxpayers

In [None]:
total_income_ppl_df = pd.DataFrame()
for year in [2020, 2021, 2022, 2023]:
    df = income_df[income_df['year_of_assessment'] == year].groupby('assessed_income_group')['number_of_taxpayers'].sum().reset_index()
    df = df.sort_values('assessed_income_group').T
    df.columns = df.iloc[0]
    df = df.iloc[1:].reset_index(drop = True)
    df.columns = [col + '_taxpayer' for col in df.columns]
    df['total_taxpayer'] = df.sum(axis = 1)
    df['year'] = year
    total_income_ppl_df = pd.concat([total_income_ppl_df, df])

In [None]:
collapsed_total_income_ppl_df = pd.melt(total_income_ppl_df, id_vars=["year"], var_name="category", value_name="number_of_people")

In [None]:
order = [
        '20,000 & below_taxpayer', '20,001 - 25,000_taxpayer', '25,001 - 30,000_taxpayer', '30,001 - 40,000_taxpayer', '40,001 - 50,000_taxpayer', 
        '50,001 - 60,000_taxpayer', '60,001 - 70,000_taxpayer', '70,001 - 80,000_taxpayer', '80,001 - 100,000_taxpayer',
        '100,001 - 150,000_taxpayer', '150,001 - 200,000_taxpayer','200,001 - 300,000_taxpayer', '300,001 - 400,000_taxpayer', '400,001 - 500,000_taxpayer', 
        '500,001 - 1,000,000_taxpayer', '1,000,001 & above_taxpayer']
df = collapsed_total_income_ppl_df[collapsed_total_income_ppl_df['category']!= 'total_taxpayer']
plotting('count of income ppl vs time', 'barplot', df, 'category', 'number_of_people', 'year', desired_order = order)

In [None]:
rate_change_income_ppl_df = df.sort_values(by=['category', 'year']).reset_index(drop=True)
rate_change_income_ppl_df['previous_year_number'] = rate_change_income_ppl_df.groupby('category')['number_of_people'].shift(1)
rate_change_income_ppl_df['ppl_increase_perc'] = (rate_change_income_ppl_df['number_of_people'] - rate_change_income_ppl_df['previous_year_number'])/ rate_change_income_ppl_df['previous_year_number'] * 100
rate_change_income_ppl_df = rate_change_income_ppl_df.dropna()

In [None]:
mean_rental_data_df = rental_data_df.groupby(['year', 'town'])['monthly_rent'].mean().reset_index()
mean_rental_data_df = mean_rental_data_df.sort_values(by=['town', 'year']).reset_index(drop=True)
mean_rental_data_df['previous_year_number'] = mean_rental_data_df.groupby('town')['monthly_rent'].shift(1)
mean_rental_data_df['rent_increase_perc'] = (mean_rental_data_df['monthly_rent'] - mean_rental_data_df['previous_year_number'])/ mean_rental_data_df['previous_year_number'] * 100
mean_rental_data_df = mean_rental_data_df.dropna()

In [None]:
mean_rental_data_df

In [None]:
plotting('Percentage of change in annual mean rent', 'barplot', mean_rental_data_df, 'town', 'rent_increase_perc', 'year')

In [None]:
order = [
        '20,000 & below_taxpayer', '20,001 - 25,000_taxpayer', '25,001 - 30,000_taxpayer', '30,001 - 40,000_taxpayer', '40,001 - 50,000_taxpayer', 
        '50,001 - 60,000_taxpayer', '60,001 - 70,000_taxpayer', '70,001 - 80,000_taxpayer', '80,001 - 100,000_taxpayer',
        '100,001 - 150,000_taxpayer', '150,001 - 200,000_taxpayer','200,001 - 300,000_taxpayer', '300,001 - 400,000_taxpayer', '400,001 - 500,000_taxpayer', 
        '500,001 - 1,000,000_taxpayer', '1,000,001 & above_taxpayer']
plotting('Percentage of change in people', 'barplot', rate_change_income_ppl_df, 'category', 'ppl_increase_perc', 'year', order)

# total income

In [None]:
total_income_df = pd.DataFrame()
for year in [2020, 2021, 2022, 2023]:
    df = income_df[income_df['year_of_assessment'] == year].groupby('assessed_income_group')['assessable_income'].sum().reset_index()
    df = df.sort_values('assessed_income_group').T
    df.columns = df.iloc[0]
    df = df.iloc[1:].reset_index(drop = True)
    df.columns = [col + '_income' for col in df.columns]
    df['total_income'] = df.sum(axis = 1)
    df['year'] = year
    total_income_df = pd.concat([total_income_df, df])

In [None]:
total_income_df

In [None]:
collapsed_total_income_df = pd.melt(total_income_df, id_vars=["year"], var_name="category", value_name="income_sum")

In [None]:
order = [
        '20,000 & below_income', '20,001 - 25,000_income', '25,001 - 30,000_income', '30,001 - 40,000_income', '40,001 - 50,000_income', 
        '50,001 - 60,000_income', '60,001 - 70,000_income', '70,001 - 80,000_income', '80,001 - 100,000_income',
        '100,001 - 150,000_income', '150,001 - 200,000_income','200,001 - 300,000_income', '300,001 - 400,000_income', '400,001 - 500,000_income', 
        '500,001 - 1,000,000_income', '1,000,001 & above_income']
df = collapsed_total_income_df[collapsed_total_income_df['category']!= 'total_income']
plotting('income vs time', 'barplot', df, 'category', 'income_sum', 'year', desired_order = order)

In [None]:
rental_data_df = rental_data_df.merge(total_income_df, on = 'year', how = 'left')
rental_data_df.head()