In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

batt_park = pd.read_csv('Batt_park_2022_sales.csv')
other_three_df = pd.read_csv('highest_three_lowest_four.csv')

frames = [batt_park, other_three_df]
highest_and_lowest = pd.concat(frames)

In [2]:
#create full DF and combined columns for first and second mtgs into one colums, 
#added a column for count to track how many get combined later into groupbys and

highest_and_lowest = highest_and_lowest.fillna(0)
highest_and_lowest['Amount Mortgaged'] = highest_and_lowest['1st Amount'] + highest_and_lowest['2nd Amount']
highest_and_lowest['Zip Count'] = highest_and_lowest['ZIP'].map(highest_and_lowest['ZIP'].value_counts())
highest_and_lowest.tail()

Unnamed: 0,Type,Address,ZIP,Purchase Amt,Purchase Down %,1st Amount,2nd Amount,CLTV %,Purchase Date,Purchase Type,City,Owner Occ?,Listed for Sale?,Amount Mortgaged,Zip Count
2159,CND,10 GREENE ST 3,10013.0,4950000.0,37.0,3100000.0,0.0,59.0,11/22/2023,Market,NEW YORK,0.0,0.0,3100000.0,673
2160,CND,570 BROOME ST 9C,10013.0,1287500.0,20.0,1030000.0,0.0,73.0,11/15/2021,Market,NEW YORK,1.0,0.0,1030000.0,673
2161,CND,14427 SANFORD AVE 2N,11355.0,777500.0,100.0,0.0,0.0,0.0,4/21/2023,Market,FLUSHING,0.0,0.0,0.0,852
2162,CND,465 WASHINGTON ST 7,10013.0,4150000.0,100.0,0.0,0.0,0.0,8/23/2023,Market,NEW YORK,0.0,0.0,0.0,673
2163,The information contained in this report is su...,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0.0,0.0,2


In [3]:
#filtered DF to only relevant columns
High_low_df = highest_and_lowest.loc[:, ['ZIP', 'Zip Count', 'Type', 'Owner Occ?', 'Purchase Down %', 
                                 'Purchase Amt', 'Amount Mortgaged']]
High_low_df.head()

Unnamed: 0,ZIP,Zip Count,Type,Owner Occ?,Purchase Down %,Purchase Amt,Amount Mortgaged
0,10282.0,38,CND,1.0,100.0,3380000.0,0.0
1,10282.0,38,CND,0.0,82.0,1678000.0,312260.0
2,10282.0,38,CND,0.0,100.0,1225000.0,0.0
3,10282.0,38,CND,0.0,100.0,1905000.0,0.0
4,10282.0,38,CND,1.0,41.0,1765000.0,1050000.0


In [4]:
#calculate the mortage tax as 1.8% for mortgages less than $500,000 and 1.95% for mortgages of $500,000 or more
#calculate the mansion tax as 1% for sales over $1,000,000
# calculate the hypotehtical sales tax if the tax rate as 2/3 or the mortgage tax (1.19%) for purchases less than
# $1,000,000 and add an additional 0.5% (1.69%)for purchases over $1,000,000

High_low_df['Mtg Tx Pd'] = np.where(High_low_df['Amount Mortgaged'] >= 500000, 
                                 High_low_df['Amount Mortgaged'] * 0.0195, High_low_df['Amount Mortgaged'] * 0.018)
High_low_df['Mansion Tax Pd'] = np.where(High_low_df['Purchase Amt'] >= 1000000, High_low_df['Purchase Amt'] * 0.01, 0)
High_low_df['New Sales Tax Amount'] = np.where(High_low_df['Purchase Amt'] >= 1000000, High_low_df['Purchase Amt'] * 0.0169,
                                            High_low_df['Purchase Amt'] * 0.0169)

High_low_df.head()

Unnamed: 0,ZIP,Zip Count,Type,Owner Occ?,Purchase Down %,Purchase Amt,Amount Mortgaged,Mtg Tx Pd,Mansion Tax Pd,New Sales Tax Amount
0,10282.0,38,CND,1.0,100.0,3380000.0,0.0,0.0,33800.0,57122.0
1,10282.0,38,CND,0.0,82.0,1678000.0,312260.0,5620.68,16780.0,28358.2
2,10282.0,38,CND,0.0,100.0,1225000.0,0.0,0.0,12250.0,20702.5
3,10282.0,38,CND,0.0,100.0,1905000.0,0.0,0.0,19050.0,32194.5
4,10282.0,38,CND,1.0,41.0,1765000.0,1050000.0,20475.0,17650.0,29828.5


In [5]:
#show the total mtg tax and mansion taxes collected and compare to what our hypothetical tax would have raised

total_mtg_tax_pd = High_low_df['Mtg Tx Pd'].sum()
total_mansion_tax_pd = High_low_df['Mansion Tax Pd'].sum()
total_est_tax = High_low_df['New Sales Tax Amount'].sum()
print(f"The total mtg tax paid in these 4 zips in 2022 was ${total_mtg_tax_pd:,.0f} and the total mansion tax collected was ${total_mansion_tax_pd:,.0f}.")
print(f'The combined mtg and mansion tax was {total_mtg_tax_pd + total_mansion_tax_pd:,.0f}, our hypothetical sales tax would have raised ${total_est_tax:,.0f}')

The total mtg tax paid in these 4 zips in 2022 was $32,193,805 and the total mansion tax collected was $38,860,868.
The combined mtg and mansion tax was 71,054,672, our hypothetical sales tax would have raised $77,231,173


In [9]:
high_low_by_zip_and_occ = High_low_df.groupby(['ZIP', 'Owner Occ?']).agg({'Zip Count': 'count', 'Purchase Amt': 'sum', 
                                                            'Purchase Down %': 'mean',
                                                            'Amount Mortgaged': 'sum', 
                                                            'Mtg Tx Pd': 'sum', 'Mansion Tax Pd': 'sum', 
                                                            'New Sales Tax Amount': 'sum'})

high_low_by_zip_and_occ.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Zip Count,Purchase Amt,Purchase Down %,Amount Mortgaged,Mtg Tx Pd,Mansion Tax Pd,New Sales Tax Amount
ZIP,Owner Occ?,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0.0,0.0,2,0.0,0.0,0.0,0.0,0.0,0.0
10007.0,0.0,76,248142500.0,77.078947,52482210.0,1022412.0,2434724.73,4193608.0
10007.0,1.0,207,720989900.0,63.917874,330202974.0,6436566.0,7175799.21,12184730.0
10013.0,0.0,167,691526700.0,76.497006,167530103.0,3264338.0,6841706.88,11686800.0
10013.0,1.0,506,2000907000.0,65.745059,751889374.0,14656300.0,19773756.26,33815330.0


In [10]:
high_low_by_zip = High_low_df.groupby(['ZIP']).agg({'Zip Count': 'count', 'Purchase Amt': 'sum', 
                                                            'Purchase Down %': 'mean',
                                                            'Amount Mortgaged': 'sum', 
                                                            'Mtg Tx Pd': 'sum', 'Mansion Tax Pd': 'sum', 
                                                            'New Sales Tax Amount': 'sum'})
high_low_by_zip.shape

(8, 7)

In [13]:
high_low_by_zip['Average Purchase $'] = high_low_by_zip['Purchase Amt']/high_low_by_zip['Zip Count']
high_low_by_zip['Average Mtg'] = high_low_by_zip['Amount Mortgaged']/high_low_by_zip['Zip Count']
high_low_by_zip['Ave Hypothetical Tax'] = high_low_by_zip['New Sales Tax Amount']/high_low_by_zip['Zip Count']
pd.options.display.float_format = '{:.1f}'.format

high_low_by_zip.head(20)

Unnamed: 0_level_0,Zip Count,Purchase Amt,Purchase Down %,Amount Mortgaged,Mtg Tx Pd,Mansion Tax Pd,New Sales Tax Amount,Average Purchase $,Average Mtg,Ave Hypothetical Tax
ZIP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0.0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10007.0,283,969132394.0,67.5,382685184.0,7458977.8,9610523.9,16378337.5,3424496.1,1352244.5,57874.0
10013.0,673,2692434001.0,68.4,919419477.0,17920636.1,26615463.1,45502134.6,4000644.9,1366150.8,67610.9
10282.0,38,108434516.0,64.5,38234470.0,744427.9,1084345.2,1832543.3,2853539.9,1006170.3,48224.8
10453.0,23,13420800.0,26.0,13365533.0,253496.4,0.0,226811.5,583513.0,581110.1,9861.4
10457.0,61,30747049.0,21.8,26901099.0,505614.0,46010.0,519625.1,504050.0,441001.6,8518.4
11355.0,852,601502235.0,66.2,223353098.0,4182613.5,1273987.0,10165387.8,705988.5,262151.5,11931.2
11368.0,271,154220886.0,57.9,61247672.0,1128039.1,230538.4,2606333.0,569080.8,226006.2,9617.5
