In [1]:
#import modules
import pandas as pd
import numpy as np
from datetime import datetime
#See max columns in a dataframe
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


In [2]:
# Read the CSV file with specified data types and skipped first 4 rows
vacant_lots_df = pd.read_csv("data/tolemi-export1692668115380.csv",skiprows=4)


In [3]:
#To see the rows before dropping duplicates
vacant_lots_df.shape

(25000, 26)

In [4]:
# Remove duplicates based on all columns
vacant_lots_df = vacant_lots_df.drop_duplicates(subset=['Parcel ID'])


In [5]:
#Rows after duplicates have been dropped
vacant_lots_df.shape

(24901, 26)

In [6]:
#Creates field Property Age Years in df

#Assign a variable to the current year
current_year = 2023

# Calculate "Property Age Years" as the difference between 2023 and Year Built
vacant_lots_df['Property Age Years'] = current_year - vacant_lots_df['Year Built']

# Assign the calculated columns to variables for potential future use
property_age_years = vacant_lots_df['Property Age Years']


In [7]:
#Creates field Vacancy Days in df

# Convert today's date to a Timestamp object
today = pd.Timestamp(datetime.today().date())

# Convert the 'Vacant Since' column to a Datetime column 
vacant_lots_df['Vacant Since'] = pd.to_datetime(vacant_lots_df['Vacant Since'])

# Calculate "Vacancy Days" as the difference between today's date and "Vacant Since" date
vacant_lots_df['Vacancy Days'] = (today - vacant_lots_df['Vacant Since']).dt.days

# Assign the calculated columns to variables for potential future use
vacancy_days = vacant_lots_df['Vacancy Days']

In [100]:
#For ending set of 3 questions use a function to map Single Family,Multi, and Commercial
def zoning_triple_category(row):
    zoning=row['Zoning']
    
    if pd.notna(zoning):#check if thed value is not NaN
        if "Single Family" in zoning or "Mobile Home" in zoning:
            return "Single Family"
        elif any(keyword in zoning for keyword in ["Multifamily", "Duplex", "Cluster Housing", "Townhouse"]):
            return "Multifamily"
        elif "Commercial" in zoning:
            return "Commercial"
        return "Other" #Return "Other" for NaN or missing values
vacant_lots_df['Zoning Category 3'] = vacant_lots_df.apply(zoning_triple_category, axis=1)    

In [101]:
#Checks strings in zoning column and maps them
def classify_zoning_category(row):
    zoning = row['Zoning']
    
    if pd.notna(zoning):  # Check if the value is not NaN
        if "Central Area" in zoning:
            return "Central Area"
        elif "Retail" in zoning or "Neighborhood Service" in zoning:
            return "Retail"
        elif any(keyword in zoning for keyword in ["Single Family", "Multifamily", "Duplex", "Cluster Housing", "Townhouse"]):
            return "Residential"
        elif "Office" in zoning:
            return "Office"
        elif "Industrial" in zoning:
            return "Industrial"
        elif "Commercial" in zoning:
            return "Commercial"
    return "Other"  # Return "Other" for NaN or missing values

# Apply the function to create the "Zoning Category" column
vacant_lots_df['Zoning Category'] = vacant_lots_df.apply(classify_zoning_category, axis=1)

In [10]:
# Check if "CHURCH" is a partial match in the "Current Use" column and create the "Owner is Church" column
vacant_lots_df['Owner is Church']=vacant_lots_df['Current Use'].str.contains('CHURCH', case=False, na=False)

# Map the True/False values to 'Yes' and 'No' in the "Owner is Church" column
vacant_lots_df['Owner is Church'] = vacant_lots_df['Owner is Church'].map({True: 'Yes', False: 'No'})

owner_is_church=vacant_lots_df["Owner is Church"]

In [11]:
# Define a function to apply the classification logic
def classify_owner(row):
    if pd.notna(row['Public Owner']):
        return row['Public Owner']
    elif row['Owner is Church'] == 'Yes':
        return 'Church'
    elif row['Owner Is Business'] == 'Yes':
        return 'Business'
    elif row['Bank Owned Property'] == 'Yes':
        return 'Bank'
    elif row['Public Owner'] == 'City of Dallas':
        return 'City of Dallas'
    elif row['Public Owner'] == 'City of Dallas Housing Authority':
        return 'City of Dallas Housing Authority'
    elif row['Public Owner'] == 'County of Dallas':
        return 'County of Dallas'  
    else:
        return 'Private Owner'

# Apply the function to create the "Owner" column
vacant_lots_df['Owner'] = vacant_lots_df.apply(classify_owner, axis=1)
   

In [116]:
# Selecting columns of interest and filtering by 'Property Type' == 'Structure'
structure_df = vacant_lots_df[vacant_lots_df['Property Type'] == 'Structure']

# Selecting columns of interest and filtering by 'Property Type' == 'Land' for land dataframe
land_df = vacant_lots_df[vacant_lots_df['Property Type'] == 'Land']

# Filter residential properties
residential_properties = vacant_lots_df[vacant_lots_df['Zoning Category'] == 'Residential']

# Filter commercial properties
commercial_properties = vacant_lots_df[vacant_lots_df['Zoning Category 3'] == 'Commercial']


In [102]:
# Export DataFrame to CSV
csv_filename = 'K.csv'
vacant_lots_df.to_csv(csv_filename, index=False)

In [14]:
vacant_lots_df.head()

Unnamed: 0,Vacant Since,Parcel ID,Address,Property Type,Current Use,Year Built,Total Living Area,Public Owner,City or County Owned,Owner Is Business,Suspected Heir Property,Bank Owned Property,Is Vacant Property,Taxes Delinquent Since,Delinquent Taxes,Is Tax Delinquent,Zip Code,Zoning,Council Districts,Code Liens - Count,Code Liens - Amount,Land Size,Depth,Frontage,Owner Property Count,Total Main Area,Property Age Years,Vacancy Days,Zoning Category 3,Zoning Category,Owner is Church,Owner
0,2023-06-01,"=""00000100051000000""","400 MAIN ST, DALLAS, TX",Land,C12 - COMMERCIAL - VACANT PLOTTED LOTS/TRACTS ...,0.0,,,No,Unknown,No,No,Yes,,,No,75202.0,Central Area - CA-1(A)-Central Area,District 2,,,0.91849,,200.0,,,2023.0,89.0,Other,Central Area,No,Private Owner
1,2022-04-01,"=""00000100561000000""","1109 WOOD ST, DALLAS, TX",Land,C12 - COMMERCIAL - VACANT PLOTTED LOTS/TRACTS ...,0.0,,,No,Yes,No,No,Yes,,,No,75202.0,Central Area - CA-1(A)-Central Area,District 14,,,0.124653,,50.0,20.0,,2023.0,515.0,Other,Central Area,No,Business
2,2023-06-01,"=""00000101971000000""","1908 ELM ST, DALLAS, TX",Structure,C12 - COMMERCIAL - VACANT PLOTTED LOTS/TRACTS ...,0.0,,,No,No,No,No,Yes,,,No,75201.0,Other - PD-619,District 14,,,0.122321,,50.0,1.0,,2023.0,89.0,Other,Other,No,Private Owner
3,2022-04-01,"=""00000101995000000""","1913 COMMERCE ST, DALLAS, TX",Land,C12 - COMMERCIAL - VACANT PLOTTED LOTS/TRACTS ...,0.0,,City of Dallas,Yes,No,No,No,Yes,,,No,75201.0,Other - PD-619,District 14,,,0.117204,,50.0,2926.0,,2023.0,515.0,Other,Other,No,City of Dallas
4,2022-04-01,"=""00000103702000000""","2523 MAIN ST, DALLAS, TX",Land,C12 - COMMERCIAL - VACANT PLOTTED LOTS/TRACTS ...,0.0,,,No,Yes,No,No,Yes,,,No,75226.0,Other - PD-269,District 2,,,0.126854,,0.0,90.0,,2023.0,515.0,Other,Other,No,Business


In [15]:
vacant_lots_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 24901 entries, 0 to 24996
Data columns (total 32 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Vacant Since             24771 non-null  datetime64[ns]
 1   Parcel ID                24901 non-null  object        
 2   Address                  24901 non-null  object        
 3   Property Type            24901 non-null  object        
 4   Current Use              24901 non-null  object        
 5   Year Built               23820 non-null  float64       
 6   Total Living Area        13327 non-null  float64       
 7   Public Owner             1641 non-null   object        
 8   City or County Owned     24901 non-null  object        
 9   Owner Is Business        24901 non-null  object        
 10  Suspected Heir Property  24901 non-null  object        
 11  Bank Owned Property      24901 non-null  object        
 12  Is Vacant Property       24901 n

In [16]:
vacant_lots_df.isnull().sum()

Vacant Since                 130
Parcel ID                      0
Address                        0
Property Type                  0
Current Use                    0
Year Built                  1081
Total Living Area          11574
Public Owner               23260
City or County Owned           0
Owner Is Business              0
Suspected Heir Property        0
Bank Owned Property            0
Is Vacant Property             0
Taxes Delinquent Since     21419
Delinquent Taxes           21419
Is Tax Delinquent              0
Zip Code                       2
Zoning                      4875
Council Districts             13
Code Liens - Count         20782
Code Liens - Amount        20782
Land Size                      0
Depth                      12869
Frontage                     303
Owner Property Count        2473
Total Main Area            11574
Property Age Years          1081
Vacancy Days                 130
Zoning Category 3           4875
Zoning Category                0
Owner is C

In [17]:
vacant_lots_df.columns

Index(['Vacant Since', 'Parcel ID', 'Address', 'Property Type', 'Current Use',
       'Year Built', 'Total Living Area', 'Public Owner',
       'City or County Owned', 'Owner Is Business', 'Suspected Heir Property',
       'Bank Owned Property', 'Is Vacant Property', 'Taxes Delinquent Since',
       'Delinquent Taxes', 'Is Tax Delinquent', 'Zip Code', 'Zoning',
       'Council Districts', 'Code Liens - Count', 'Code Liens - Amount',
       'Land Size', 'Depth', 'Frontage', 'Owner Property Count',
       'Total Main Area', 'Property Age Years', 'Vacancy Days',
       'Zoning Category 3', 'Zoning Category', 'Owner is Church', 'Owner'],
      dtype='object')

In [103]:
#1.How many vacant lots are in each council district?

#Groups Council distrcits and if it is vacant by council district
vacant_lots_counts_by_district  = vacant_lots_df.groupby(['Council Districts', 'Is Vacant Property'])['Council Districts'].size().unstack(fill_value=0)


In [104]:
results = {}

#dictonary that starts the result values
results["How many vacant lots are in each council district?"] = {
    'question_number': 1,
    'value': vacant_lots_counts_by_district
}

results


{'How many vacant lots are in each council district?': {'question_number': 1,
  'value': Is Vacant Property   Yes
  Council Districts       
  District 1          1233
  District 10          498
  District 11          555
  District 12          216
  District 13          983
  District 14         1182
  District 2          2340
  District 3          1437
  District 4          3552
  District 5          1541
  District 6          3857
  District 7          4047
  District 8          2829
  District 9           618}}

In [20]:
#2 Group by "Council District" and compute statistics
vacant_lots_stats = vacant_lots_df.groupby('Council Districts')['Vacancy Days'].agg(['min','median', 'mean', 'max'])

# Rename the columns for clarity
vacant_lots_stats.columns = ['Min Days', 'Median Days', 'Mean Days', 'Max Days']


In [21]:
results={}

results["How many vacant lots are in each council district?"] = {
    'question_number': 2,
    'value': vacant_lots_stats
}

results

{'How many vacant lots are in each council district?': {'question_number': 2,
  'value':                    Min Days  Median Days   Mean Days  Max Days
  Council Districts                                             
  District 1             89.0        515.0  478.136919     515.0
  District 10            89.0        515.0  445.271084     515.0
  District 11            89.0        515.0  454.556364     515.0
  District 12            89.0        515.0  441.957944     515.0
  District 13            89.0        515.0  450.408350     515.0
  District 14            62.0        515.0  457.983578     515.0
  District 2             89.0        515.0  472.863359     515.0
  District 3             89.0        515.0  495.934737     515.0
  District 4             89.0        515.0  482.039459     515.0
  District 5             89.0        515.0  496.064893     515.0
  District 6             89.0        515.0  486.229948     515.0
  District 7             89.0        515.0  494.058329     515.0
  D

In [22]:
# 3 How many are zoned single family residential, commercial and multifamily? <-- rows are council districts; columns are counts within each of the 3 zoning types

# Group by 'Council Districts' and 'Zoning Category', then count occurrences
zoning_category_counts = vacant_lots_df.groupby(['Council Districts', 'Zoning Category']).size().unstack(fill_value=0)


In [23]:
results={}

results[" How many are zoned single family residential, commercial and multifamily? <-- rows are council districts; columns are counts within each of the 3 zoning types"] = {
    'question_number': 3,
    'value': zoning_category_counts
}

results

{' How many are zoned single family residential, commercial and multifamily? <-- rows are council districts; columns are counts within each of the 3 zoning types': {'question_number': 3,
  'value': Zoning Category    Central Area  Industrial  Office  Other  Residential  \
  Council Districts                                                         
  District 1                    0          27       2    754          373   
  District 10                   0          55      15    249          155   
  District 11                   0           0      18    309          208   
  District 12                   0           0       9     64          133   
  District 13                   0           0      17    307          626   
  District 14                 163           1       0    841          145   
  District 2                   79         178      11   1622          383   
  District 3                    0         151      16    409          746   
  District 4                    0 

In [24]:
# 3 How many are zoned single family residential, commercial and multifamily? <-- rows are council districts; columns are counts within each of the 3 zoning types

# Group by 'Council Districts' and 'Zoning Category', then count occurrences
zoning_category_counts_3 = vacant_lots_df.groupby(['Council Districts', 'Zoning Category 3']).size().unstack(fill_value=0)


In [105]:
results={}

results[" How many are zoned single family residential, commercial and multifamily? <-- rows are council districts; columns are counts within each of the 3 zoning types"] = {
    'question_number': 3.5,
    'value': zoning_category_counts_3
}

results

{' How many are zoned single family residential, commercial and multifamily? <-- rows are council districts; columns are counts within each of the 3 zoning types': {'question_number': 3.5,
  'value': Zoning Category 3  Commercial  Multifamily  Other  Single Family
  Council Districts                                               
  District 1                  5           64    804            312
  District 10                 5           88    237             67
  District 11                 0           35    347            173
  District 12                 0           48     83             85
  District 13                 0           68    326            558
  District 14                 1          122    625             23
  District 2                152          267   1200            116
  District 3                 18           70    654            676
  District 4                 75          238   1076           2159
  District 5                 17           50    579            89

In [26]:
# 4. How many vacant structures are in each council district?

# Groups Council districts by structure and vacant property

# Grouping by 'Council Districts' and counting occurrences
vacant_structure_count = structure_df.groupby(['Council Districts', 'Is Vacant Property'])['Property Type'].size().unstack(fill_value=0)

In [27]:
results={}

results["How many vacant structures are in each council district?"] = {
    'question_number': 4,
    'value': vacant_structure_count
}

results

{'How many vacant structures are in each council district?': {'question_number': 4,
  'value': Is Vacant Property   Yes
  Council Districts       
  District 1           392
  District 10          198
  District 11          288
  District 12           76
  District 13          639
  District 14          481
  District 2           832
  District 3           311
  District 4          1120
  District 5           388
  District 6           965
  District 7           791
  District 8           583
  District 9           433}}

In [28]:
#5 How long have the vacant structures in each council district been vacant? (see question 2)
vacant_structure_count_days = structure_df.groupby(['Council Districts','Vacancy Days'])['Vacancy Days'].size()


In [29]:
results={}

results["How long have the vacant structures in each council district been vacant? (see question 2)"] = {
    'question_number': 5,
    'value': vacant_structure_count_days
}

results

{'How long have the vacant structures in each council district been vacant? (see question 2)': {'question_number': 5,
  'value': Council Districts  Vacancy Days
  District 1         89.0             51
                     181.0             6
                     234.0             3
                     270.0             3
                     327.0             7
                     362.0             8
                     424.0            13
                     515.0           300
  District 10        89.0             46
                     120.0             1
                     181.0            13
                     234.0             3
                     270.0            12
                     327.0             7
                     362.0             3
                     424.0             4
                     515.0           109
  District 11        89.0             31
                     120.0             2
                     181.0            13
                   

In [30]:

#6 How many vacant structures in each council district are zoned single family residential, commercial and multifamily? (see question 3)

zoning_category_counts_structure = structure_df.groupby(['Council Districts', 'Zoning Category']).size().unstack(fill_value=0)


In [31]:
results={}

results["How many vacant structures in each council district are zoned single family residential, commercial and multifamily? (see question 3)"] = {
    'question_number': 6,
    'value': zoning_category_counts_structure
}

results

{'How many vacant structures in each council district are zoned single family residential, commercial and multifamily? (see question 3)': {'question_number': 6,
  'value': Zoning Category    Central Area  Industrial  Office  Other  Residential  \
  Council Districts                                                         
  District 1                    0           3       0    200          163   
  District 10                   0          22      12    106           41   
  District 11                   0           0       8    137          130   
  District 12                   0           0       4     15           50   
  District 13                   0           0      13    140          466   
  District 14                  38           1       0    367           65   
  District 2                    9          33       2    628          138   
  District 3                    0          12       1     49          213   
  District 4                    0          27       1     72

In [32]:
#7.How many vacant lots are in each zip code?

# Set the display option to show all rows
pd.set_option('display.max_rows', None)

# Groups the zip code and if property is vacant and produces the count 
vacant_lots_counts_by_zip = vacant_lots_df.groupby(['Zip Code','Is Vacant Property'])['Zip Code'].size().unstack(fill_value=0)

# Convert index (zip codes) to integers to remove the .0 decimal part
vacant_lots_counts_by_zip.index = vacant_lots_counts_by_zip.index.astype(int)

In [33]:
results={}

results["How many vacant lots are in each zip code?"] = {
    'question_number': 7,
    'value': vacant_lots_counts_by_zip
}

results

{'How many vacant lots are in each zip code?': {'question_number': 7,
  'value': Is Vacant Property   Yes
  Zip Code                
  75006                  5
  75019                 26
  75043                  7
  75050                  1
  75051                 13
  75060                  2
  75062                  2
  75063                  4
  75080                  5
  75081                  2
  75104                  1
  75115                  1
  75134                  1
  75141                  5
  75149                  4
  75150                 12
  75159                  7
  75181                  4
  75182                  3
  75201                319
  75202                127
  75203               1658
  75204                461
  75205                 79
  75206                299
  75207                408
  75208                790
  75209                308
  75210                857
  75211                808
  75212               2136
  75214                290
  7

In [34]:
#8 How long have they been vacant?
vacant_zips_count_days = vacant_lots_df.groupby(['Zip Code','Vacancy Days'])['Vacancy Days'].size()


In [35]:
results={}

results["How long have they been vacant?"] = {
    'question_number': 8,
    'value': vacant_zips_count_days
}

results

{'How long have they been vacant?': {'question_number': 8,
  'value': Zip Code  Vacancy Days
  75006.0   515.0              5
  75019.0   89.0               2
            515.0             24
  75043.0   515.0              6
  75050.0   515.0              1
  75051.0   515.0             13
  75060.0   515.0              2
  75062.0   515.0              2
  75063.0   515.0              4
  75080.0   515.0              5
  75081.0   515.0              2
  75104.0   515.0              1
  75115.0   327.0              1
  75134.0   515.0              1
  75141.0   515.0              5
  75149.0   515.0              4
  75150.0   362.0              2
            515.0             10
  75159.0   515.0              7
  75181.0   515.0              4
  75182.0   515.0              3
  75201.0   89.0              35
            181.0              2
            234.0             14
            270.0              1
            362.0              4
            424.0              1
            515.

In [36]:
#9 How many vacant lots in each zip code are zoned single family residential, commercial and multifamily?


zoning_category_counts_zip = vacant_lots_df.groupby(['Zip Code', 'Zoning Category']).size().unstack(fill_value=0)


In [37]:
results={}

results["How many vacant lots in each zip code are zoned single family residential, commercial and multifamily?"] = {
    'question_number': 9,
    'value': zoning_category_counts_zip
}

results

{'How many vacant lots in each zip code are zoned single family residential, commercial and multifamily?': {'question_number': 9,
  'value': Zoning Category  Central Area  Industrial  Office  Other  Residential  Retail
  Zip Code                                                                     
  75006.0                     0           0       0      2            0       3
  75019.0                     0           0       0     26            0       0
  75043.0                     0           0       0      7            0       0
  75050.0                     0           0       0      1            0       0
  75051.0                     0          10       0      0            3       0
  75060.0                     0           0       0      2            0       0
  75062.0                     0           0       0      2            0       0
  75063.0                     0           0       0      4            0       0
  75080.0                     0           0       0      5   

In [38]:
#10:How many vacant structures are in each zip code?

# Groups the zip code and if property is vacant and produces the count 
vacant_structure_counts_by_zip = structure_df.groupby(['Zip Code','Is Vacant Property','Property Type'])['Property Type'].size().unstack(fill_value=0)


In [39]:
results={}

results["How many vacant structures are in each zip code?"] = {
    'question_number': 10,
    'value': vacant_structure_counts_by_zip
}

results

{'How many vacant structures are in each zip code?': {'question_number': 10,
  'value': Property Type                Structure
  Zip Code Is Vacant Property           
  75006.0  Yes                         1
  75051.0  Yes                         3
  75063.0  Yes                         1
  75081.0  Yes                         1
  75115.0  Yes                         1
  75150.0  Yes                         1
  75201.0  Yes                        96
  75202.0  Yes                        27
  75203.0  Yes                       265
  75204.0  Yes                       137
  75205.0  Yes                        52
  75206.0  Yes                       153
  75207.0  Yes                       109
  75208.0  Yes                       214
  75209.0  Yes                       208
  75210.0  Yes                       164
  75211.0  Yes                       236
  75212.0  Yes                       444
  75214.0  Yes                       175
  75215.0  Yes                       343
  75216.0  Y

In [40]:
#11: How long have the vacant structures in each zip code been vacant? (see question 8)
vacant_zips_structure_count_days = structure_df.groupby(['Zip Code','Vacancy Days'])['Vacancy Days'].size()


In [41]:
results={}

results["How long have the vacant structures in each zip code been vacant? (see question 8)"] = {
    'question_number': 11,
    'value': vacant_zips_structure_count_days
}

results

{'How long have the vacant structures in each zip code been vacant? (see question 8)': {'question_number': 11,
  'value': Zip Code  Vacancy Days
  75006.0   515.0             1
  75051.0   515.0             3
  75063.0   515.0             1
  75081.0   515.0             1
  75115.0   327.0             1
  75150.0   362.0             1
  75201.0   89.0             28
            181.0             1
            234.0            14
            270.0             1
            362.0             3
            424.0             1
            515.0            47
  75202.0   89.0              6
            120.0             1
            181.0             2
            362.0             1
            515.0            17
  75203.0   89.0             25
            181.0             7
            234.0             7
            270.0             5
            327.0             9
            362.0             8
            424.0             9
            515.0           194
  75204.0   89.0       

In [42]:
#12 How many vacant structures in each zip code are zoned single family residential, commercial and multifamily? (see question 9)

zoning_category_counts_structure_zip = structure_df.groupby(['Zip Code', 'Zoning Category']).size().unstack(fill_value=0)


In [43]:
results={}

results["How many vacant structures in each zip code are zoned single family residential, commercial and multifamily? (see question 9)"] = {
    'question_number': 12,
    'value': zoning_category_counts_structure_zip
}

results

{'How many vacant structures in each zip code are zoned single family residential, commercial and multifamily? (see question 9)': {'question_number': 12,
  'value': Zoning Category  Central Area  Industrial  Office  Other  Residential  Retail
  Zip Code                                                                     
  75006.0                     0           0       0      0            0       1
  75051.0                     0           2       0      0            1       0
  75063.0                     0           0       0      1            0       0
  75081.0                     0           0       0      1            0       0
  75115.0                     0           0       0      1            0       0
  75150.0                     0           0       0      1            0       0
  75201.0                    23           0       0     73            0       0
  75202.0                    23           0       0      4            0       0
  75203.0                     0      

In [44]:
#13.What is the total amount of back taxes owed on vacant land in each City Council District?

# Grouping and calculating the total amount of back taxes owed on vacant land by City Council District
#Remove vacant prop yes filter
back_taxes_total_by_district = land_df.groupby('Council Districts')['Delinquent Taxes'].sum()

In [45]:
results={}

results["What is the total amount of back taxes owed on vacant land in each City Council District?"] = {
    'question_number': 13,
    'value': back_taxes_total_by_district
}

results

{'What is the total amount of back taxes owed on vacant land in each City Council District?': {'question_number': 13,
  'value': Council Districts
  District 1      509.12
  District 10      95.33
  District 11      36.66
  District 12       1.33
  District 13    3528.23
  District 14     833.85
  District 2     1918.61
  District 3      814.54
  District 4     1876.74
  District 5      377.34
  District 6     1943.60
  District 7     1252.82
  District 8     1620.58
  District 9       32.16
  Name: Delinquent Taxes, dtype: float64}}

In [46]:
#14.What is the total amount of back taxes owed on vacant land in each zip code?

# Grouping and calculating the total amount of back taxes owed on vacant land by Zip Code
back_taxes_total_by_zip = land_df[land_df['Is Vacant Property'] == 'Yes'].groupby('Zip Code')['Delinquent Taxes'].sum()

# Convert index (zip codes) to integers to remove the .0 decimal part
back_taxes_total_by_zip.index = back_taxes_total_by_zip.index.astype(int)


In [47]:
results={}

results["What is the total amount of back taxes owed on vacant land in each zip code?"] = {
    'question_number': 14,
    'value': back_taxes_total_by_zip
}

results

{'What is the total amount of back taxes owed on vacant land in each zip code?': {'question_number': 14,
  'value': Zip Code
  75006       0.00
  75019       0.11
  75043       0.00
  75050       0.00
  75051       4.58
  75060       0.00
  75062       0.00
  75063       0.00
  75080       0.01
  75081       0.16
  75104       0.00
  75134       0.00
  75141       0.00
  75149       0.00
  75150       0.04
  75159       0.01
  75181       0.36
  75182       0.00
  75201     638.93
  75202      13.98
  75203     698.38
  75204     396.80
  75205       0.00
  75206     114.95
  75207      52.21
  75208     530.57
  75209      45.79
  75210     250.38
  75211     398.49
  75212    1228.64
  75214      13.53
  75215     831.03
  75216    1117.71
  75217     643.85
  75218       3.51
  75219     181.55
  75220    3032.09
  75223     420.19
  75224     148.24
  75225      20.77
  75226     740.80
  75227     151.08
  75228      40.81
  75229      47.74
  75230       1.52
  75231     792.07
 

In [48]:
#15 What is the total amount of back taxes owed on vacant structures in each City Council District?

# Grouping and calculating the total amount of back taxes owed on vacant land by City Council District
back_taxes_total_by_district_by_structure = structure_df[structure_df['Is Vacant Property'] == 'Yes'].groupby('Council Districts')['Delinquent Taxes'].sum()


In [49]:
results={}

results["What is the total amount of back taxes owed on vacant structures in each City Council District?"] = {
    'question_number': 15,
    'value': back_taxes_total_by_district_by_structure
}

results

{'What is the total amount of back taxes owed on vacant structures in each City Council District?': {'question_number': 15,
  'value': Council Districts
  District 1      194.58
  District 10     204.67
  District 11     589.50
  District 12     100.59
  District 13    3111.29
  District 14     879.84
  District 2      927.22
  District 3      334.25
  District 4     1264.02
  District 5      523.21
  District 6     1136.37
  District 7      591.83
  District 8      516.88
  District 9      658.61
  Name: Delinquent Taxes, dtype: float64}}

In [50]:
#16 What is the total amount of back taxes owed on vacant structures in each zip code?

# Grouping and calculating the total amount of back taxes owed on vacant land by Zip Code
back_taxes_total_by_zip_by_structure = structure_df[structure_df['Is Vacant Property'] == 'Yes'].groupby('Zip Code')['Delinquent Taxes'].sum()


In [51]:
results={}

results["What is the total amount of back taxes owed on vacant structures in each zip code?"] = {
    'question_number': 16,
    'value': back_taxes_total_by_zip_by_structure
}

results

{'What is the total amount of back taxes owed on vacant structures in each zip code?': {'question_number': 16,
  'value': Zip Code
  75006.0       0.00
  75051.0       0.00
  75063.0       0.00
  75081.0       0.00
  75115.0      48.01
  75150.0       0.01
  75201.0      55.97
  75202.0       0.00
  75203.0     257.82
  75204.0     195.98
  75205.0      89.96
  75206.0     348.09
  75207.0       0.02
  75208.0     223.17
  75209.0     336.47
  75210.0     131.62
  75211.0     154.94
  75212.0     361.65
  75214.0     267.57
  75215.0     233.84
  75216.0    1016.29
  75217.0     385.64
  75218.0     520.51
  75219.0     153.49
  75220.0    1549.09
  75223.0     419.15
  75224.0      14.90
  75225.0     382.06
  75226.0       5.69
  75227.0     393.95
  75228.0     307.38
  75229.0     877.89
  75230.0     631.32
  75231.0     466.48
  75232.0     138.74
  75233.0      84.50
  75234.0       0.00
  75235.0      74.50
  75236.0      11.48
  75237.0      56.98
  75238.0     115.34
  75240.

In [52]:
#17 What is the average age of residential properties in each council district?

avg_age__prop_districts=vacant_lots_df.groupby('Council Districts')['Property Age Years'].mean()


In [53]:
results={}

results["What is the average age of residential properties in each council district?"] = {
    'question_number': 17,
    'value': avg_age__prop_districts
}

results

{'What is the average age of residential properties in each council district?': {'question_number': 17,
  'value': Council Districts
  District 1     1572.464716
  District 10    1370.064050
  District 11    1472.124088
  District 12    1375.589623
  District 13     998.992739
  District 14    1509.926720
  District 2     1522.142599
  District 3     1670.648162
  District 4     1520.761722
  District 5     1647.409211
  District 6     1670.038651
  District 7     1690.851691
  District 8     1731.556948
  District 9      918.152174
  Name: Property Age Years, dtype: float64}}

In [54]:
#18 What is the average age of residential properties in each zip code?

avg_age__prop_zip=vacant_lots_df.groupby('Zip Code')['Property Age Years'].mean()

#Fills na values with 0
avg_age__prop_zip=avg_age__prop_districts.fillna(0)

#converts avg age to int data type to remve decimals
avg_age__prop_zip=avg_age__prop_districts.astype(int)


In [55]:
results={}

results["What is the average age of residential properties in each zip code?"] = {
    'question_number': 18,
    'value': avg_age__prop_zip
}

results

{'What is the average age of residential properties in each zip code?': {'question_number': 18,
  'value': Council Districts
  District 1     1572
  District 10    1370
  District 11    1472
  District 12    1375
  District 13     998
  District 14    1509
  District 2     1522
  District 3     1670
  District 4     1520
  District 5     1647
  District 6     1670
  District 7     1690
  District 8     1731
  District 9      918
  Name: Property Age Years, dtype: int32}}

In [56]:
#19 What is the average size of residential properties in each council district?


# Group by council district and calculate the average property size
average_size_by_district = residential_properties.groupby('Council Districts')['Land Size'].mean()

In [57]:
results={}

results["What is the average size of residential properties in each council district?"] = {
    'question_number': 19,
    'value': average_size_by_district
}

results

{'What is the average size of residential properties in each council district?': {'question_number': 19,
  'value': Council Districts
  District 1     0.514335
  District 10    1.794358
  District 11    1.691032
  District 12    1.204741
  District 13    0.646895
  District 14    0.332276
  District 2     0.440493
  District 3     1.671820
  District 4     0.326718
  District 5     0.741237
  District 6     0.238190
  District 7     1.642671
  District 8     2.008309
  District 9     0.666493
  Name: Land Size, dtype: float64}}

In [93]:
#20 What is the average size of residential properties in each zip code?

# Group by council district and calculate the average property size
average_size_by_zip = residential_properties.groupby('Zip Code')['Land Size'].mean()

In [94]:
results={}

results["What is the average size of residential properties in each zip code?"] = {
    'question_number': 20,
    'value': average_size_by_zip
}

results

{'What is the average size of residential properties in each zip code?': {'question_number': 20,
  'value': Zip Code
  75051.0     2.393843
  75081.0     0.107777
  75104.0     0.272161
  75134.0     0.164271
  75159.0     0.230310
  75203.0     0.233540
  75204.0     0.247548
  75206.0     0.263806
  75208.0     0.479640
  75209.0     0.454525
  75210.0     1.750586
  75211.0     1.189478
  75212.0     0.221169
  75214.0     0.396212
  75215.0    49.157738
  75216.0     0.408171
  75217.0     1.335408
  75218.0     0.841723
  75220.0     0.549756
  75223.0     0.315742
  75224.0     0.680054
  75225.0     0.607995
  75226.0     0.256969
  75227.0     0.652345
  75228.0     1.498331
  75229.0     0.801061
  75230.0     0.705908
  75231.0     0.485553
  75232.0     1.035799
  75233.0     0.981486
  75234.0     0.208237
  75235.0     0.493364
  75236.0     2.953820
  75237.0     1.836207
  75238.0     0.220755
  75240.0     1.206837
  75241.0     1.822136
  75243.0     2.057468
  75244.0

In [117]:
#21 What is the average age of commercial properties in each council district?
#Not SOLVED!
# Group by council district and calculate the average property size by commerical prop
average_age_by_district_commerical = commercial_properties.groupby('Council Districts')['Property Age Years'].mean()


In [118]:
results={}

results["What is the average age of commercial properties in each council district?"] = {
    'question_number': 21,
    'value': average_age_by_district_commerical
}

results

{'What is the average age of commercial properties in each council district?': {'question_number': 21,
  'value': Council Districts
  District 1     1241.600000
  District 10    1527.250000
  District 14       9.000000
  District 2     1877.229730
  District 3     1558.230769
  District 4     1890.351351
  District 5     1556.470588
  District 6     1972.897436
  District 7     1839.734375
  District 8     2009.711409
  District 9     1483.818182
  Name: Property Age Years, dtype: float64}}

In [112]:
#23 What is the total amount of city liens owed on vacant land in each City Council District?

# Grouping and calculating the total amount of city liens owed on vacant land by City Council District
city_liens_total_by_district = land_df.groupby('Council Districts')['Code Liens - Amount'].sum()


In [113]:
results={}

results["What is the total amount of city liens owed on vacant land in each City Council District?"] = {
    'question_number': 23,
    'value': city_liens_total_by_district
}

results

{'What is the total amount of city liens owed on vacant land in each City Council District?': {'question_number': 23,
  'value': Council Districts
  District 1       80042.03
  District 10       8004.55
  District 11       8169.02
  District 12          0.00
  District 13       1453.51
  District 14      15524.56
  District 2      210849.33
  District 3      145026.89
  District 4     1791752.69
  District 5      367186.22
  District 6      455578.70
  District 7     4408563.06
  District 8      641226.15
  District 9        6418.36
  Name: Code Liens - Amount, dtype: float64}}

In [110]:
#24 What is the total amount of city liens owed on vacant land in each zip code?

# Grouping and calculating the total amount of city liens owed on vacant land by City Council District
city_liens_total_by_zip = land_df.groupby('Zip Code')['Code Liens - Amount'].sum()


In [111]:
results={}

results["What is the total amount of city liens owed on vacant land in each zip code?"] = {
    'question_number': 24,
    'value': city_liens_total_by_zip
}

results

{'What is the total amount of city liens owed on vacant land in each zip code?': {'question_number': 24,
  'value': Zip Code
  75006.0          0.00
  75019.0          0.00
  75043.0          0.00
  75050.0          0.00
  75051.0        529.14
  75060.0          0.00
  75062.0          0.00
  75063.0          0.00
  75080.0          0.00
  75081.0          0.00
  75104.0          0.00
  75134.0          0.00
  75141.0          0.00
  75149.0          0.00
  75150.0          0.00
  75159.0          0.00
  75181.0          0.00
  75182.0          0.00
  75201.0          0.00
  75202.0          0.00
  75203.0     744316.49
  75204.0      33255.10
  75205.0          0.00
  75206.0       2695.52
  75207.0       4291.07
  75208.0      37664.92
  75209.0       5607.21
  75210.0    1397374.47
  75211.0      74447.57
  75212.0     403744.44
  75214.0       6836.98
  75215.0    2650483.76
  75216.0    1163509.84
  75217.0     395255.39
  75218.0          0.00
  75219.0       6348.52
  75220.0  

In [114]:
#25 What is the total amount of city liens owed on vacant structures in each City Council District?

# Grouping and calculating the total amount of city liens owed on vacant structures by City Council District
city_liens_total_by_district_by_structure = structure_df.groupby('Council Districts')['Code Liens - Amount'].sum()


In [115]:
results={}

results[" What is the total amount of city liens owed on vacant structures in each City Council District?"] = {
    'question_number': 25,
    'value': city_liens_total_by_district_by_structure
}

results

{' What is the total amount of city liens owed on vacant structures in each City Council District?': {'question_number': 25,
  'value': Council Districts
  District 1      30170.73
  District 10      2939.90
  District 11     31571.55
  District 12     49869.82
  District 13      3353.51
  District 14     13228.02
  District 2      53486.84
  District 3      67498.54
  District 4     579110.85
  District 5      90014.33
  District 6      70999.05
  District 7     311093.02
  District 8     147739.99
  District 9      36739.96
  Name: Code Liens - Amount, dtype: float64}}

In [68]:
#26 How many properties are in the name of heirship in the City of Dallas?

#Use suspected heir prop col and combine with public owner filter
# Filter the DataFrame for properties in the City of Dallas with 'Suspected Heir Property' == 'Yes'
#Would get zero combined with City of Dallas filter
dallas_heirship_properties = (vacant_lots_df['Suspected Heir Property'] == 'Yes').sum()



In [69]:
results={}

results["How many properties are in the name of heirship in the City of Dallas?"] = {
    'question_number': 26,
    'value': dallas_heirship_properties
}

results

{'How many properties are in the name of heirship in the City of Dallas?': {'question_number': 26,
  'value': 156}}

In [70]:
#27 How many properties in the name of heirship in the City of Dallas are vacant in each zip code?

#apply Suspected heir to Yes and group by zip code
dallas_heirship_zip = vacant_lots_df[vacant_lots_df['Suspected Heir Property'] == 'Yes'].groupby('Zip Code').size()


In [71]:
results={}

results["How many properties in the name of heirship in the City of Dallas are vacant in each zip code?"] = {
    'question_number': 27,
    'value': dallas_heirship_zip
}

results

{'How many properties in the name of heirship in the City of Dallas are vacant in each zip code?': {'question_number': 27,
  'value': Zip Code
  75201.0     2
  75203.0    13
  75206.0     2
  75208.0     2
  75209.0     1
  75210.0     8
  75211.0     2
  75212.0    10
  75214.0     2
  75215.0    22
  75216.0    27
  75217.0     7
  75218.0     3
  75223.0     4
  75225.0     1
  75226.0     3
  75227.0     5
  75228.0     7
  75230.0     1
  75231.0     2
  75232.0     1
  75233.0     1
  75235.0     1
  75237.0     2
  75238.0     1
  75241.0    14
  75243.0     1
  75246.0     1
  75248.0     1
  75249.0     2
  75253.0     7
  dtype: int64}}

In [72]:
#28 How many properties in the name of heirship in the City of Dallas are vacant in each council district?

#apply Suspected heir to Yes and group by council distrcit
dallas_heirship_districts = vacant_lots_df[vacant_lots_df['Suspected Heir Property'] == 'Yes'].groupby('Council Districts').size()


In [73]:
results={}

results["How many properties in the name of heirship in the City of Dallas are vacant in each council district?"] = {
    'question_number': 28,
    'value': dallas_heirship_districts
}

results

{'How many properties in the name of heirship in the City of Dallas are vacant in each council district?': {'question_number': 28,
  'value': Council Districts
  District 1      2
  District 10     4
  District 11     2
  District 13     1
  District 14     3
  District 2     11
  District 3      8
  District 4     35
  District 5     10
  District 6     11
  District 7     40
  District 8     21
  District 9      8
  dtype: int64}}

In [74]:
#29 How many private owners have five or more vacant properties in the City of Dallas?
owner_vacant_property_counts = vacant_lots_df[
    (vacant_lots_df['Owner'] == 'Private Owner') &
    (vacant_lots_df['Owner Property Count'] >= 5)
]

owner_vacant_property_counts_view = owner_vacant_property_counts.shape[0]


In [75]:
results={}

results["How many private owners have five or more vacant properties in the City of Dallas?"] = {
    'question_number': 29,
    'value': owner_vacant_property_counts_view
}

results

{'How many private owners have five or more vacant properties in the City of Dallas?': {'question_number': 29,
  'value': 4527}}

In [76]:
#30 How many private owners with five or more vacant properties in the City of Dallas are in each zip code?

private_5_zip=owner_vacant_property_counts.groupby('Zip Code').size()

In [77]:
results={}

results["How many private owners with five or more vacant properties in the City of Dallas are in each zip code?"] = {
    'question_number': 30,
    'value': private_5_zip
}

results

{'How many private owners with five or more vacant properties in the City of Dallas are in each zip code?': {'question_number': 30,
  'value': Zip Code
  75019.0      7
  75043.0      1
  75050.0      1
  75051.0      1
  75063.0      1
  75081.0      1
  75181.0      2
  75182.0      1
  75201.0     33
  75202.0     14
  75203.0    348
  75204.0     52
  75205.0      9
  75206.0     41
  75207.0     57
  75208.0    124
  75209.0     44
  75210.0    142
  75211.0    192
  75212.0    548
  75214.0     15
  75215.0    417
  75216.0    438
  75217.0    267
  75218.0     37
  75219.0     33
  75220.0     77
  75223.0    142
  75224.0     38
  75225.0     48
  75226.0    104
  75227.0    128
  75228.0     64
  75229.0     71
  75230.0     44
  75231.0     21
  75232.0     73
  75233.0     38
  75234.0      4
  75235.0    103
  75236.0    118
  75237.0     64
  75238.0     12
  75240.0     11
  75241.0    209
  75243.0     70
  75244.0      5
  75246.0     40
  75247.0     48
  75248.0     2

In [78]:
#31 How many private owners with five or more vacant properties in the City of Dallas are in each council district?

private_5_district=owner_vacant_property_counts.groupby('Council Districts').size()

In [79]:
results={}

results[" How many private owners with five or more vacant properties in the City of Dallas are in each council district?"] = {
    'question_number': 31,
    'value': private_5_district
}

results

{' How many private owners with five or more vacant properties in the City of Dallas are in each council district?': {'question_number': 31,
  'value': Council Districts
  District 1     254
  District 10     89
  District 11     61
  District 12     26
  District 13    124
  District 14    148
  District 2     397
  District 3     365
  District 4     652
  District 5     266
  District 6     889
  District 7     766
  District 8     427
  District 9      61
  dtype: int64}}

In [80]:
#32 How many churches own vacant properties in the City of Dallas?

church_owners = vacant_lots_df[vacant_lots_df['Owner is Church'] == 'Yes']

num_church_properties = church_owners.shape[0]



In [81]:
results={}

results["How many churches own vacant properties in the City of Dallas?"] = {
    'question_number': 32,
    'value': num_church_properties
}

results

{'How many churches own vacant properties in the City of Dallas?': {'question_number': 32,
  'value': 27}}

In [82]:
#33 How many churches owned vacant properties in the City of Dallas are in each zip code?

church_prop_zip = vacant_lots_df[vacant_lots_df['Owner is Church'] == 'Yes'].groupby('Zip Code').size()


In [83]:
results={}

results["How many churches own vacant properties in the City of Dallas?"] = {
    'question_number': 33,
    'value': church_prop_zip
}

results

{'How many churches own vacant properties in the City of Dallas?': {'question_number': 33,
  'value': Zip Code
  75203.0    1
  75211.0    2
  75212.0    2
  75215.0    1
  75216.0    2
  75217.0    4
  75223.0    1
  75224.0    1
  75226.0    2
  75227.0    1
  75228.0    1
  75233.0    2
  75238.0    1
  75241.0    6
  dtype: int64}}

In [84]:
#34 How many are in each council district

church_prop_district,l = vacant_lots_df[vacant_lots_df['Owner is Church'] == 'Yes'].groupby('Council Districts').size()


In [85]:
results={}

results["How many churches own vacant properties in the City of Dallas?"] = {
    'question_number': 34,
    'value': church_prop_district
}

results

{'How many churches own vacant properties in the City of Dallas?': {'question_number': 34,
  'value': Council Districts
  District 1    1
  District 2    1
  District 3    3
  District 4    4
  District 5    3
  District 6    2
  District 7    5
  District 8    7
  District 9    1
  dtype: int64}}