In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

full_df = pd.read_csv('brooklyn_2022_sales.csv')
high_low_df = pd.read_csv('highest_three_lowest_four.csv')
batt_park_df = pd.read_csv('Batt_park_2022_sales.csv')

frames = [full_df, high_low_df, batt_park_df]
full_df = pd.concat(frames)
full_df.shape

(17758, 13)

In [2]:
#create full DF and combined columns for first and second mtgs into one colums, added a count column
full_df = full_df.fillna(0)
full_df['Amount Mortgaged'] = full_df['1st Amount'] + full_df['2nd Amount']
full_df['Count'] = 1
full_df = full_df.loc[:, ['Count', 'ZIP', 'Type', 'Owner Occ?', 'Purchase Down %', 
                                 'Purchase Amt', 'Amount Mortgaged']]



full_df.tail()

Unnamed: 0,Count,ZIP,Type,Owner Occ?,Purchase Down %,Purchase Amt,Amount Mortgaged
34,1,10282.0,CND,1.0,100.0,2795000.0,1100000.0
35,1,10282.0,CND,0.0,100.0,2102686.0,1652000.0
36,1,10282.0,CND,1.0,52.0,2700000.0,1300000.0
37,1,10282.0,CND,1.0,100.0,2800000.0,0.0
38,1,0.0,The information contained in this report is su...,0.0,0.0,0.0,0.0


In [3]:
#calculate how much mortgage tax was collected.
# for mortgages less than $500,000, tax is 1.8% for mortgages of $500,000, 1.95% 
#calculate the mansion tax as 1% for sales over $1,000,000

full_df['Mtg Tx Pd'] = np.where(full_df['Amount Mortgaged'] >= 500000, 
                                 full_df['Amount Mortgaged'] * 0.0195, full_df['Amount Mortgaged'] * 0.018)
full_df['Mansion Tax Pd'] = np.where(full_df['Purchase Amt'] >= 1000000, full_df['Purchase Amt'] * 0.01, 0)
full_df.head()

Unnamed: 0,Count,ZIP,Type,Owner Occ?,Purchase Down %,Purchase Amt,Amount Mortgaged,Mtg Tx Pd,Mansion Tax Pd
0,1,11201.0,CND,0.0,33.0,4600000.0,3062500.0,59718.75,46000.0
1,1,11201.0,CND,0.0,30.0,4550000.0,3185000.0,62107.5,45500.0
2,1,11201.0,CND,0.0,69.0,2620000.0,825084.0,16089.138,26200.0
3,1,11201.0,CND,1.0,100.0,2700000.0,0.0,0.0,27000.0
4,1,11201.0,CND,0.0,100.0,4740000.0,0.0,0.0,47400.0


In [4]:
#created two dataframes, one for investor purchases and the other for primary residences
investor_df = full_df[(full_df['Owner Occ?'] ==1)]
owner_occ_df = full_df[(full_df['Owner Occ?'] == 0)]


In [5]:
#calculate new sales tax info for private residences, 0.9% for purchases under $1mil and 
#calculate new sales tax for investment purchases, 4.4357%
owner_occ_df['New Sales Tax Amount'] = np.where(owner_occ_df['Purchase Amt'] >= 1000000, owner_occ_df['Purchase Amt'] * 0.009,
                                owner_occ_df['Purchase Amt'] * 0.014)
investor_df['New Sales Tax Amount'] = investor_df['Purchase Amt'] * 0.04438

owner_occ_df.tail()

Unnamed: 0,Count,ZIP,Type,Owner Occ?,Purchase Down %,Purchase Amt,Amount Mortgaged,Mtg Tx Pd,Mansion Tax Pd,New Sales Tax Amount
27,1,10282.0,CND,0.0,100.0,6212500.0,0.0,0.0,62125.0,55912.5
28,1,10282.0,CND,0.0,0.0,2127839.0,1680000.0,32760.0,21278.39,19150.551
33,1,10282.0,CND,0.0,21.0,3253308.0,2556000.0,49842.0,32533.08,29279.772
35,1,10282.0,CND,0.0,100.0,2102686.0,1652000.0,32214.0,21026.86,18924.174
38,1,0.0,The information contained in this report is su...,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
#show the total mtg tax and mansion taxes collected and compare to what our hypothetical tax would have raised

total_mtg_tax_pd = owner_occ_df['Mtg Tx Pd'].sum()
total_mansion_tax_pd = owner_occ_df['Mansion Tax Pd'].sum()
total_est_tax = owner_occ_df['New Sales Tax Amount'].sum()
total_mtg_tax_inves_pd = investor_df['Mtg Tx Pd'].sum()
total_mansion_tax_inves_pd = investor_df['Mansion Tax Pd'].sum()
total_est_inves_tax = investor_df['New Sales Tax Amount'].sum()
print(f"The total mtg tax paid in bklyn by individuals purchasing primary residences in 2022 was ${total_mtg_tax_pd:,.0f} and the total mansion tax collected was ${total_mansion_tax_pd:,.0f}.")
print(f'The combined mtg and mansion tax was {total_mtg_tax_pd + total_mansion_tax_pd:,.0f}, our hypothetical sales tax would have raised ${total_est_tax:,.0f}')
print(f"The total mtg tax paid in bklyn by investors in 2022 was ${total_mtg_tax_inves_pd:,.0f} and the total mansion tax collected was ${total_mansion_tax_inves_pd:,.0f}.")
print(f'The combined mtg and mansion tax was {total_mtg_tax_inves_pd + total_mansion_tax_inves_pd:,.0f}, our hypothetical sales tax on investments would have raised ${total_est_inves_tax:,.0f}')
print(f'The total tax raised in 2022 was {total_mtg_tax_pd + total_mansion_tax_pd + total_mtg_tax_inves_pd + total_mansion_tax_inves_pd:,.0f} and the total amount that would have been raised would have been ${total_est_tax + total_est_inves_tax:,.0f}')

The total mtg tax paid in bklyn by individuals purchasing primary residences in 2022 was $94,933,458 and the total mansion tax collected was $74,625,891.
The combined mtg and mansion tax was 169,559,348, our hypothetical sales tax would have raised $112,560,703
The total mtg tax paid in bklyn by investors in 2022 was $114,969,829 and the total mansion tax collected was $73,171,797.
The combined mtg and mansion tax was 188,141,626, our hypothetical sales tax on investments would have raised $514,771,421
The total tax raised in 2022 was 357,700,974 and the total amount that would have been raised would have been $627,332,124


In [7]:
owner_occ_by_zip = owner_occ_df.groupby(['ZIP']).agg({'Count': 'sum', 'Purchase Amt': 'sum', 
                                                            'Purchase Down %': 'mean',
                                                            'Amount Mortgaged': 'sum', 
                                                            'Mtg Tx Pd': 'sum', 'Mansion Tax Pd': 'sum', 
                                                            'New Sales Tax Amount': 'sum'})
owner_occ_by_zip.head()

Unnamed: 0_level_0,Count,Purchase Amt,Purchase Down %,Amount Mortgaged,Mtg Tx Pd,Mansion Tax Pd,New Sales Tax Amount
ZIP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.0,3,0.0,0.0,0.0,0.0,0.0,0.0
10007.0,76,248142473.0,77.078947,52482210.0,1022412.0,2434724.73,2256632.257
10013.0,167,691526687.0,76.497006,167530103.0,3264338.0,6841706.88,6260520.178
10282.0,19,51679833.0,65.631579,18227281.0,354963.6,516798.33,465118.497
10453.0,8,4950000.0,17.625,5542498.0,106458.7,0.0,69300.0


In [8]:
investor_by_zip = investor_df.groupby(['ZIP']).agg({'Count': 'sum', 'Purchase Amt': 'sum', 
                                                            'Purchase Down %': 'mean',
                                                            'Amount Mortgaged': 'sum', 
                                                            'Mtg Tx Pd': 'sum', 'Mansion Tax Pd': 'sum', 
                                                            'New Sales Tax Amount': 'sum'})
investor_by_zip.shape

(46, 7)

In [9]:
owner_occ_by_zip['Average Purchase $'] = owner_occ_by_zip['Purchase Amt']/owner_occ_by_zip['Count']
owner_occ_by_zip['Average Mtg'] = owner_occ_by_zip['Amount Mortgaged']/owner_occ_by_zip['Count']
owner_occ_by_zip['Ave Hypothetical Tax'] = owner_occ_by_zip['New Sales Tax Amount']/owner_occ_by_zip['Count']
pd.options.display.float_format = '{:.1f}'.format

owner_occ_by_zip.head()

Unnamed: 0_level_0,Count,Purchase Amt,Purchase Down %,Amount Mortgaged,Mtg Tx Pd,Mansion Tax Pd,New Sales Tax Amount,Average Purchase $,Average Mtg,Ave Hypothetical Tax
ZIP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.0,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10007.0,76,248142473.0,77.1,52482210.0,1022411.8,2434724.7,2256632.3,3265032.5,690555.4,29692.5
10013.0,167,691526687.0,76.5,167530103.0,3264338.1,6841706.9,6260520.2,4140878.4,1003174.3,37488.1
10282.0,19,51679833.0,65.6,18227281.0,354963.6,516798.3,465118.5,2719991.2,959330.6,24479.9
10453.0,8,4950000.0,17.6,5542498.0,106458.7,0.0,69300.0,618750.0,692812.2,8662.5


In [10]:
investor_by_zip['Average Purchase $'] = investor_by_zip['Purchase Amt']/investor_by_zip['Count']
investor_by_zip['Average Mtg'] = investor_by_zip['Amount Mortgaged']/investor_by_zip['Count']
investor_by_zip['Ave Hypothetical Tax'] = investor_by_zip['New Sales Tax Amount']/investor_by_zip['Count']
pd.options.display.float_format = '{:.1f}'.format

investor_by_zip.head()

Unnamed: 0_level_0,Count,Purchase Amt,Purchase Down %,Amount Mortgaged,Mtg Tx Pd,Mansion Tax Pd,New Sales Tax Amount,Average Purchase $,Average Mtg,Ave Hypothetical Tax
ZIP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10007.0,207,720989921.0,63.9,330202974.0,6436566.0,7175799.2,31997532.7,3483043.1,1595183.4,154577.5
10013.0,506,2000907314.0,65.7,751889374.0,14656298.0,19773756.3,88800266.6,3954362.3,1485947.4,175494.6
10282.0,19,56754683.0,63.3,20007189.0,389464.3,567546.8,2518772.8,2987088.6,1053009.9,132567.0
10453.0,15,8470800.0,30.5,7823035.0,147037.7,0.0,375934.1,564720.0,521535.7,25062.3
10457.0,50,24377949.0,18.5,21581746.0,406340.6,21510.0,1081893.4,487559.0,431634.9,21637.9


In [11]:
frames = [owner_occ_df, investor_df]
calculated_full_df = pd.concat(frames)
calculated_full_df.shape

(17758, 10)

In [12]:
calculated_full_df.to_csv('Sales_info_with_taxes.csv')