In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

full_bklyn_df = pd.read_csv('brooklyn_2022_sales.csv')
full_bklyn_df.shape

(15555, 13)

In [2]:
#create full DF and combined columns for first and second mtgs into one colums, 
#added a column for count to track how many get combined later into groupbys and

full_bklyn_df = full_bklyn_df.fillna(0)
full_bklyn_df['Amount Mortgaged'] = full_bklyn_df['1st Amount'] + full_bklyn_df['2nd Amount']
full_bklyn_df['Zip Count'] = full_bklyn_df['ZIP'].map(full_bklyn_df['ZIP'].value_counts())
full_bklyn_df.tail()

Unnamed: 0,Type,Address,ZIP,Purchase Amt,Purchase Down %,1st Amount,2nd Amount,CLTV %,Purchase Date,Purchase Type,City,Owner Occ?,Listed for Sale?,Amount Mortgaged,Zip Count
15550,CND,127 PUTNAM AVE 1B,11238.0,1190000.0,25.0,892500.0,0.0,72.0,6/1/2023,Market,BROOKLYN,0.0,0.0,892500.0,709
15551,CND,108 JACKSON ST 1B,11211.0,1430000.0,20.0,1144000.0,0.0,81.0,10/26/2023,Market,BROOKLYN,0.0,0.0,1144000.0,551
15552,CND,2222 OCEAN AVE 2F,11229.0,380000.0,100.0,0.0,0.0,0.0,11/14/2023,Market,BROOKLYN,0.0,0.0,0.0,952
15553,CND,2218 OCEAN AVE 5E,11229.0,315000.0,100.0,0.0,0.0,0.0,4/25/2023,Market,BROOKLYN,0.0,0.0,0.0,952
15554,The information contained in this report is su...,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,1


In [3]:
#filtered DF to only relevant columns
bklyn_df = full_bklyn_df.loc[:, ['ZIP', 'Zip Count', 'Type', 'Owner Occ?', 'Purchase Down %', 
                                 'Purchase Amt', 'Amount Mortgaged']]
bklyn_df.head()

Unnamed: 0,ZIP,Zip Count,Type,Owner Occ?,Purchase Down %,Purchase Amt,Amount Mortgaged
0,11201.0,1187,CND,0.0,33.0,4600000.0,3062500.0
1,11201.0,1187,CND,0.0,30.0,4550000.0,3185000.0
2,11201.0,1187,CND,0.0,69.0,2620000.0,825084.0
3,11201.0,1187,CND,1.0,100.0,2700000.0,0.0
4,11201.0,1187,CND,0.0,100.0,4740000.0,0.0


In [4]:
#calculate the mortage tax as 1.8% for mortgages less than $500,000 and 1.95% for mortgages of $500,000 or more
#calculate the mansion tax as 1% for sales over $1,000,000
# calculate the hypotehtical sales tax if the tax rate as 2/3 or the mortgage tax (1.19%) for purchases less than
# $1,000,000 and add an additional 0.5% (1.69%)for purchases over $1,000,000

bklyn_df['Mtg Tx Pd'] = np.where(bklyn_df['Amount Mortgaged'] >= 500000, 
                                 bklyn_df['Amount Mortgaged'] * 0.0195, bklyn_df['Amount Mortgaged'] * 0.018)
bklyn_df['Mansion Tax Pd'] = np.where(bklyn_df['Purchase Amt'] >= 1000000, bklyn_df['Purchase Amt'] * 0.01, 0)
bklyn_df['New Sales Tax Amount'] = np.where(bklyn_df['Purchase Amt'] >= 1000000, bklyn_df['Purchase Amt'] * 0.0169,
                                            bklyn_df['Purchase Amt'] * 0.0169)

bklyn_df.head()

Unnamed: 0,ZIP,Zip Count,Type,Owner Occ?,Purchase Down %,Purchase Amt,Amount Mortgaged,Mtg Tx Pd,Mansion Tax Pd,New Sales Tax Amount
0,11201.0,1187,CND,0.0,33.0,4600000.0,3062500.0,59718.75,46000.0,77740.0
1,11201.0,1187,CND,0.0,30.0,4550000.0,3185000.0,62107.5,45500.0,76895.0
2,11201.0,1187,CND,0.0,69.0,2620000.0,825084.0,16089.138,26200.0,44278.0
3,11201.0,1187,CND,1.0,100.0,2700000.0,0.0,0.0,27000.0,45630.0
4,11201.0,1187,CND,0.0,100.0,4740000.0,0.0,0.0,47400.0,80106.0


In [5]:
#show the total mtg tax and mansion taxes collected and compare to what our hypothetical tax would have raised

total_mtg_tax_pd = bklyn_df['Mtg Tx Pd'].sum()
total_mansion_tax_pd = bklyn_df['Mansion Tax Pd'].sum()
total_est_tax = bklyn_df['New Sales Tax Amount'].sum()
print(f"The total mtg tax paid in bklyn in 2022 was ${total_mtg_tax_pd:,.0f} and the total mansion tax collected was ${total_mansion_tax_pd:,.0f}.")
print(f'The combined mtg and mansion tax was {total_mtg_tax_pd + total_mansion_tax_pd:,.0f}, our hypothetical sales tax would have raised ${total_est_tax:,.0f}')

The total mtg tax paid in bklyn in 2022 was $177,709,482 and the total mansion tax collected was $108,936,820.
The combined mtg and mansion tax was 286,646,302, our hypothetical sales tax would have raised $299,713,802


In [8]:
bklyn_by_zip_and_occ = bklyn_df.groupby(['ZIP', 'Owner Occ?']).agg({'Zip Count': 'count', 'Purchase Amt': 'sum', 
                                                            'Purchase Down %': 'mean',
                                                            'Amount Mortgaged': 'sum', 
                                                            'Mtg Tx Pd': 'sum', 'Mansion Tax Pd': 'sum', 
                                                            'New Sales Tax Amount': 'sum'})

bklyn_by_zip_and_occ.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Zip Count,Purchase Amt,Amount Mortgaged,Mtg Tx Pd,Mansion Tax Pd,New Sales Tax Amount
ZIP,Owner Occ?,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0.0,0.0,1,0.0,0.0,0.0,0.0,0.0
11201.0,0.0,677,1166733000.0,499361433.0,9694053.0,9711303.47,19717790.0
11201.0,1.0,510,989852500.0,424935315.0,8249097.0,8558505.11,16728510.0
11203.0,0.0,162,91241270.0,77032678.0,1459914.0,0.0,1541977.0
11203.0,1.0,222,123863000.0,106287037.0,2019364.0,13750.0,2093284.0


In [9]:
bklyn_by_zip = bklyn_df.groupby(['ZIP']).agg({'Zip Count': 'count', 'Purchase Amt': 'sum', 
                                                            'Purchase Down %': 'mean',
                                                            'Amount Mortgaged': 'sum', 
                                                            'Mtg Tx Pd': 'sum', 'Mansion Tax Pd': 'sum', 
                                                            'New Sales Tax Amount': 'sum'})
bklyn_by_zip.shape

(40, 7)

In [11]:
bklyn_by_zip['Average Purchase $'] = bklyn_by_zip['Purchase Amt']/bklyn_by_zip['Zip Count']
bklyn_by_zip['Average Mtg'] = bklyn_by_zip['Amount Mortgaged']/bklyn_by_zip['Zip Count']
bklyn_by_zip['Ave Hypothetical Tax'] = bklyn_by_zip['New Sales Tax Amount']/bklyn_by_zip['Zip Count']
pd.options.display.float_format = '{:.1f}'.format

bklyn_by_zip.head(20)

Unnamed: 0_level_0,Zip Count,Purchase Amt,Purchase Down %,Amount Mortgaged,Mtg Tx Pd,Mansion Tax Pd,New Sales Tax Amount,Average Purchase $,Average Mtg,Ave Hypothetical Tax
ZIP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11201.0,1187,2156585841.0,54.5,924296748.0,17943149.6,18269808.6,36446300.7,1816837.3,778683.0,30704.5
11203.0,384,215104236.0,18.1,183319715.0,3479278.5,13750.0,3635261.6,560167.3,477395.1,9466.8
11204.0,313,244119904.0,48.2,137348534.0,2618449.1,493305.0,4125626.4,779935.8,438813.2,13180.9
11205.0,243,433696771.0,53.3,222852091.0,4319524.6,3374536.1,7329475.4,1784760.4,917086.8,30162.5
11206.0,173,125072210.0,48.0,76289290.0,1459413.8,215840.0,2113720.3,722960.8,440978.6,12218.0
11207.0,213,139825408.0,26.4,170771902.0,3285258.0,460100.0,2363049.4,656457.3,801746.0,11094.1
11208.0,184,97591593.0,27.9,89297188.0,1691983.4,19000.0,1649297.9,530389.1,485310.8,8963.6
11209.0,578,664303580.0,46.2,359218161.0,6950245.9,4450420.8,11226730.5,1149314.2,621484.7,19423.4
11210.0,523,540909987.0,31.9,349899548.0,6747389.9,2682005.5,9141378.8,1034244.7,669024.0,17478.7
