**Goal**  
Clean and explore the PLUTO tax lot data so it is ready to link to restaurants and to describe neighborhood property characteristics.

**Plan**
1. Load the PLUTO CSV from `data/raw`.
2. Keep only the columns we need 
3. Convert numeric fields (assessed value, year built) to the right types and handle missing.
4. Check basic distributions for borough, land use, building class, assessed value, and year built.
5. Filter or flag obvious outliers or unusable rows if needed.
6. Save a cleaned version 
7. Write a short summary 

In [5]:
import pandas as pd

pluto_path = "../data/raw/Primary_Land_Use_Tax_Lot_Output_(PLUTO)_20251202.csv"
pluto = pd.read_csv(pluto_path)

  pluto = pd.read_csv(pluto_path)


In [6]:
pluto.head(20)

Unnamed: 0,borough,Tax block,Tax lot,community board,census tract 2010,cb2010,schooldist,council district,postcode,firecomp,...,bctcb2020,geom,basempdate,dcasdate,edesigdate,landmkdate,masdate,polidate,rpaddate,zoningdate
0,BX,2869,47,205.0,243.0,3000.0,10.0,14.0,10453.0,E075,...,20243000000.0,,,,,,,,,
1,MN,675,39,104.0,99.0,1017.0,2.0,3.0,10001.0,E034,...,10099020000.0,,,,,,,,,
2,MN,698,54,104.0,99.0,1030.0,2.0,3.0,10001.0,E003,...,10099020000.0,,,,,,,,,
3,MN,698,56,104.0,99.0,1030.0,2.0,3.0,10001.0,E003,...,10099020000.0,,,,,,,,,
4,MN,698,28,104.0,99.0,1030.0,2.0,3.0,10001.0,E003,...,10099020000.0,,,,,,,,,
5,MN,698,37,104.0,99.0,1030.0,2.0,3.0,10001.0,E003,...,10099020000.0,,,,,,,,,
6,MN,698,35,104.0,99.0,1030.0,2.0,3.0,10001.0,E003,...,10099020000.0,,,,,,,,,
7,MN,698,47,104.0,99.0,1030.0,2.0,3.0,10001.0,E003,...,10099020000.0,,,,,,,,,
8,MN,1306,42,106.0,100.0,2004.0,2.0,4.0,10022.0,E008,...,10100000000.0,,,,,,,,,
9,MN,1304,26,106.0,100.0,2006.0,2.0,4.0,10017.0,L002,...,10100000000.0,,,,,,,,,


In [7]:
pluto.columns.tolist()

['borough',
 'Tax block',
 'Tax lot',
 'community board',
 'census tract 2010',
 'cb2010',
 'schooldist',
 'council district',
 'postcode',
 'firecomp',
 'policeprct',
 'healtharea',
 'sanitboro',
 'sanitsub',
 'address',
 'zonedist1',
 'zonedist2',
 'zonedist3',
 'zonedist4',
 'overlay1',
 'overlay2',
 'spdist1',
 'spdist2',
 'spdist3',
 'ltdheight',
 'splitzone',
 'bldgclass',
 'landuse',
 'easements',
 'ownertype',
 'ownername',
 'lotarea',
 'bldgarea',
 'comarea',
 'resarea',
 'officearea',
 'retailarea',
 'garagearea',
 'strgearea',
 'factryarea',
 'otherarea',
 'areasource',
 'numbldgs',
 'numfloors',
 'unitsres',
 'unitstotal',
 'lotfront',
 'lotdepth',
 'bldgfront',
 'bldgdepth',
 'ext',
 'proxcode',
 'irrlotcode',
 'lottype',
 'bsmtcode',
 'assessland',
 'assesstot',
 'exempttot',
 'yearbuilt',
 'yearalter1',
 'yearalter2',
 'histdist',
 'landmark',
 'builtfar',
 'residfar',
 'commfar',
 'facilfar',
 'borocode',
 'BBL',
 'condono',
 'tract2010',
 'xcoord',
 'ycoord',
 'latitud

In [None]:
keep_cols = [
    "BBL",# unique tax lot ID
    "borough",# borough code
    "Tax block",# tax block number within borough
    "Tax lot",# tax lot number within block
    "postcode",# ZIP code
    "address",# street address
    "landuse",# land use code
    "bldgclass", # building class code
    "lotarea",# area of the tax lot (sq ft)
    "bldgarea",# total building floor area (sq ft)
    "unitsres", # number of residential units
    "unitstotal",# total units (res + others)
    "yearbuilt",# year the main building was built
    "assesstot",# total assessed value of land + building
    "latitude",# latitude of the lot
    "longitude"# longitude of the lot
]

pluto_small = pluto[keep_cols].copy()



Unnamed: 0,BBL,borough,Tax block,Tax lot,postcode,address,landuse,bldgclass,lotarea,bldgarea,unitsres,unitstotal,yearbuilt,assesstot,latitude,longitude
0,2028690047,BX,2869,47,10453.0,89 WEST TREMONT AVENUE,3.0,D1,19146,36708,80,80,2003.0,915750.0,40.850594,-73.912743
1,1006750039,MN,675,39,10001.0,606 WEST 30TH ST,4.0,D7,17281,313013,277,278,2020.0,49067100.0,40.753382,-74.004717
2,1006980054,MN,698,54,10001.0,530 WEST 27 STREET,5.0,J5,3839,22524,0,6,1920.0,4349700.0,40.75061,-74.003865
3,1006980056,MN,698,56,10001.0,534 WEST 27 STREET,5.0,J5,1918,11583,0,5,1920.0,2505150.0,40.750648,-74.003959
4,1006980028,MN,698,28,10001.0,511 WEST 26 STREET,10.0,G6,12225,0,0,0,0.0,1191150.0,40.750129,-74.003136


In [9]:
pluto_small.head(20)

Unnamed: 0,BBL,borough,Tax block,Tax lot,postcode,address,landuse,bldgclass,lotarea,bldgarea,unitsres,unitstotal,yearbuilt,assesstot,latitude,longitude
0,2028690047,BX,2869,47,10453.0,89 WEST TREMONT AVENUE,3.0,D1,19146,36708,80,80,2003.0,915750.0,40.850594,-73.912743
1,1006750039,MN,675,39,10001.0,606 WEST 30TH ST,4.0,D7,17281,313013,277,278,2020.0,49067100.0,40.753382,-74.004717
2,1006980054,MN,698,54,10001.0,530 WEST 27 STREET,5.0,J5,3839,22524,0,6,1920.0,4349700.0,40.75061,-74.003865
3,1006980056,MN,698,56,10001.0,534 WEST 27 STREET,5.0,J5,1918,11583,0,5,1920.0,2505150.0,40.750648,-74.003959
4,1006980028,MN,698,28,10001.0,511 WEST 26 STREET,10.0,G6,12225,0,0,0,0.0,1191150.0,40.750129,-74.003136
5,1006980037,MN,698,37,10001.0,289 10 AVENUE,5.0,K2,4937,8700,0,1,1946.0,2703150.0,40.750088,-74.002826
6,1006980035,MN,698,35,10001.0,285 10 AVENUE,4.0,K4,3160,4344,0,4,1930.0,1097100.0,40.749967,-74.002869
7,1006980047,MN,698,47,10001.0,518 WEST 27 STREET,5.0,H1,4937,35987,0,1,2011.0,6775200.0,40.75042,-74.003418
8,1013060042,MN,1306,42,10022.0,154 EAST 52 STREET,5.0,O4,15062,304640,0,16,1984.0,37647000.0,40.757213,-73.970763
9,1013040026,MN,1304,26,10017.0,145 EAST 49 STREET,5.0,HS,5682,37371,0,3,1916.0,4904550.0,40.75578,-73.972019


In [10]:
#save cleaned file once cleaned (Still need to be cleaned more)
pluto_small.to_csv("../data/processed/pluto_clean.csv", index=False)