In [59]:
from pyspark.sql.functions import mean, udf, col, round, max, greatest, count
from pyspark.sql.types import DoubleType
from pyspark.sql.functions import rank, col
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
%matplotlib inline

In [60]:
# ! curl -O https://raw.githubusercontent.com/NYCPlanning/db-pluto/master/pluto_build/data/dcp_zoning_maxfar.csv
# ! mv dcp_zoning_maxfar.csv data/dcp_zoning_maxfar.csv

In [61]:
df = spark.read.csv('data/pluto.csv', header=True)
new_far = spark.read.csv('data/dcp_zoning_maxfar.csv', header=True)
new_far = new_far.select([col(A).alias(A.lower()+'_new') for A in new_far.columns])
df = df.join(new_far, df['zonedist1'] == new_far['zonedist_new'])

In [62]:
df = df.select([col(A).alias(A.lower()) for A in df.columns])\
       .withColumn('maxfar', greatest(col('residfar'), col('commfar')))\
       .withColumn('pctunbuilt', ((col('maxfar') - col('builtfar'))/col('maxfar')))\
       .withColumn('maxfar_new', greatest(col('residfar_new'), col('commfar_new')))\
       .withColumn('pctunbuilt_new', ((col('maxfar_new') - col('builtfar'))/col('maxfar_new')))\
       .filter(col('unitsres').cast(DoubleType()) <= 6)\
       .filter(col('landmark').isNull())\
       .filter(col('irrlotcode') != 'Y')\
       .filter(~col ('bldgclass').like('M%'))\
       .filter(col('landuse') != '08')\
       .filter(col('easements').cast(DoubleType()) <= 0)\

In [63]:
# Using new Far values: 
df.groupBy('borough')\
  .agg(count(col('pctunbuilt_new') >= 0.5).alias('softsites'))\
  .show()

+-------+---------+
|borough|softsites|
+-------+---------+
|     MN|     3770|
|     BX|      959|
|     QN|     2803|
|     BK|     3474|
|     SI|      639|
+-------+---------+



In [64]:
# Using old Far values: 
df.groupBy('borough')\
  .agg(count(col('pctunbuilt') >= 0.5).alias('softsites'))\
  .show()

+-------+---------+
|borough|softsites|
+-------+---------+
|     MN|    14245|
|     BX|    64049|
|     QN|   268392|
|     BK|   229183|
|     SI|    95226|
+-------+---------+

