In [21]:
import pandas as pd

# Define the path to the CSV file
csv_path = r"C:/Users/mitchele/University of Iowa/Dedas, Jason M - Analytics Experience Group 2/Data/zillow_properties.csv"

# Read the CSV file into a DataFrame
properties_df = pd.read_csv(csv_path)

# Convert the 'dateSold' column from milliseconds to datetime
properties_df['dateSold'] = pd.to_datetime(properties_df['dateSold'], unit='ms')

# Format the 'dateSold' column to mm/dd/yyyy
properties_df['dateSold'] = properties_df['dateSold'].dt.strftime('%m/%d/%Y')

# Split the 'address' column into 'street_address', 'city', 'state', and 'zip'
address_split = properties_df['address'].str.extract(r'(?P<street_address>.*),\s*(?P<city>.*),\s*(?P<state>\w{2})\s*(?P<zipcode>\d{5})')

# Concatenate the split address columns back to the original dataframe
properties_df = pd.concat([properties_df, address_split], axis=1)

# Drop the original 'address' column
properties_df = properties_df.drop(columns=['address'])

# Define the desired column order
desired_column_order = ['dateSold', 'zpid', 'propertyType', 'zestimate','price', 'street_address','city', 'state', 'zipcode','bedrooms',
                        'bathrooms','livingArea', 'lotAreaValue','lotAreaUnit','listingStatus', 'daysOnZillow','latitude', 'longitude', 'imgSrc','detailUrl']

# Reindex the DataFrame to reorder columns
properties_df = properties_df.reindex(columns=desired_column_order)

# Print the DataFrame to verify the import, conversion, reordering, and address splitting
print(properties_df.head())

# Save the reordered DataFrame to a new CSV file
csv_path_reordered = r"C:/Users/mitchele/University of Iowa/Dedas, Jason M - Analytics Experience Group 2/Data/zillow_properties_clean.csv"
properties_df.to_csv(csv_path_reordered, index=False)

print(f"Data saved to {csv_path_reordered}")


     dateSold       zpid   propertyType  zestimate   price  \
0  06/07/2024  200404042  SINGLE_FAMILY   193000.0  100000   
1  06/05/2024  347056087          CONDO        NaN  113000   
2  06/03/2024   57254348          CONDO   110100.0  110000   
3  05/29/2024   57735447      TOWNHOUSE    81200.0  111000   
4  05/24/2024   59000609  SINGLE_FAMILY   110500.0  110000   

              street_address              city state zipcode  bedrooms  \
0        1257 Old Clinton Rd         Westbrook    CT   06498       1.0   
1    246 Centerbrook Rd #246            Hamden    CT   06518       3.0   
2  50 Greenhouse Rd UNIT 34B        Bridgeport    CT   06606       1.0   
3           187 Sigourney St          Hartford    CT   06105       4.0   
4             63 Tolland Ave  Stafford Springs    CT   06076       3.0   

   bathrooms  livingArea  lotAreaValue lotAreaUnit  listingStatus  \
0        1.0       516.0           0.5       acres  RECENTLY_SOLD   
1        2.0      1850.0           NaN      

In [19]:
prop_df = pd.DataFrame(properties_df)
prop_df

Unnamed: 0,dateSold,zpid,propertyType,zestimate,price,street_address,city,state,zipcode,bedrooms,bathrooms,livingArea,lotAreaValue,lotAreaUnit,listingStatus,daysOnZillow,latitude,longitude,imgSrc,detailUrl
0,06/07/2024,200404042,SINGLE_FAMILY,193000.0,100000,1257 Old Clinton Rd,Westbrook,CT,06498,1.0,1.0,516.0,0.500,acres,RECENTLY_SOLD,4,41.287952,-72.453610,https://photos.zillowstatic.com/fp/8c765e0a2ca...,/homedetails/1257-Old-Clinton-Rd-Westbrook-CT-...
1,06/05/2024,347056087,CONDO,,113000,246 Centerbrook Rd #246,Hamden,CT,06518,3.0,2.0,1850.0,,,RECENTLY_SOLD,6,41.376650,-72.909150,https://photos.zillowstatic.com/fp/479f688eaa6...,/homedetails/246-Centerbrook-Rd-246-Hamden-CT-...
2,06/03/2024,57254348,CONDO,110100.0,110000,50 Greenhouse Rd UNIT 34B,Bridgeport,CT,06606,1.0,1.0,825.0,,,RECENTLY_SOLD,8,41.218826,-73.221450,https://photos.zillowstatic.com/fp/71c52b8056e...,/homedetails/50-Greenhouse-Rd-UNIT-34B-Bridgep...
3,05/29/2024,57735447,TOWNHOUSE,81200.0,111000,187 Sigourney St,Hartford,CT,06105,4.0,3.0,2306.0,6969.600,sqft,RECENTLY_SOLD,13,41.772133,-72.693436,https://photos.zillowstatic.com/fp/7ca93a2dd74...,/homedetails/187-Sigourney-St-Hartford-CT-0610...
4,05/24/2024,59000609,SINGLE_FAMILY,110500.0,110000,63 Tolland Ave,Stafford Springs,CT,06076,3.0,1.0,896.0,5227.200,sqft,RECENTLY_SOLD,18,41.949284,-72.319810,https://photos.zillowstatic.com/fp/701cfe7dd9f...,/homedetails/63-Tolland-Ave-Stafford-Springs-C...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24595,08/31/2021,2074798650,SINGLE_FAMILY,871300.0,689000,10 Cocheco Ave LOT 10A,Branford,CT,06405,3.0,3.0,1730.0,6847.632,sqft,RECENTLY_SOLD,1015,41.259937,-72.800120,https://photos.zillowstatic.com/fp/580fe0e55ee...,/homedetails/10-Cocheco-Ave-LOT-10A-Branford-C...
24596,08/30/2021,57296100,SINGLE_FAMILY,808300.0,689000,83 Pepperidge Cir,Fairfield,CT,06824,3.0,2.0,1636.0,10454.400,sqft,RECENTLY_SOLD,1016,41.169785,-73.256790,https://photos.zillowstatic.com/fp/4c288574f63...,/homedetails/83-Pepperidge-Cir-Fairfield-CT-06...
24597,08/30/2021,57399975,SINGLE_FAMILY,914800.0,685000,34 Bull Frog Ln,Trumbull,CT,06611,6.0,4.0,3822.0,1.000,acres,RECENTLY_SOLD,1016,41.274937,-73.256035,https://photos.zillowstatic.com/fp/b1e8501db61...,/homedetails/34-Bull-Frog-Ln-Trumbull-CT-06611...
24598,08/27/2021,174059427,SINGLE_FAMILY,865900.0,693500,424 Woodland St,South Glastonbury,CT,06073,4.0,3.0,3466.0,0.920,acres,RECENTLY_SOLD,1019,41.659830,-72.560910,https://photos.zillowstatic.com/fp/179c0a96d2c...,/homedetails/424-Woodland-St-South-Glastonbury...


In [26]:
zpid_list = prop_df['zpid'].tolist()
zpid_list

[200404042,
 347056087,
 57254348,
 57735447,
 59000609,
 345460302,
 2056886378,
 241163272,
 61994768,
 62546314,
 241157987,
 57737572,
 57247564,
 57910660,
 66120457,
 347397039,
 59966293,
 59296178,
 333559108,
 57703905,
 57897415,
 57935560,
 343513883,
 207181052,
 58000161,
 57355677,
 96133151,
 58162278,
 59295751,
 58968757,
 57281339,
 246083317,
 299911926,
 2053760309,
 57254361,
 241164570,
 58087487,
 57809982,
 58129304,
 57735379,
 57736205,
 2056894161,
 57708776,
 57739754,
 68976015,
 57334775,
 57906011,
 207170997,
 241320420,
 57736088,
 58976506,
 147591517,
 241163073,
 2063599010,
 57805769,
 207181094,
 174039665,
 241157994,
 197770798,
 200393848,
 241180453,
 2056879980,
 59294329,
 61996531,
 2053164326,
 2054067502,
 58002426,
 174012675,
 246083334,
 57247816,
 197804376,
 57700402,
 57733220,
 123514350,
 2126345500,
 174050908,
 58009135,
 59287485,
 60100800,
 174186244,
 174384777,
 57910310,
 57731871,
 174042223,
 59056040,
 250757308,
 572543

In [28]:
len(zpid_list)

24600