In [1]:
import pandas as pd
import numpy as np

In [2]:
df_apartments = pd.read_csv('../data/processed/apartments_clean.csv')
df_crime = pd.read_csv('../data/processed/crime_clean.csv')

print("num apts:", len(df_apartments))
print("numcrime records:", len(df_crime))

num apts: 532
numcrime records: 425466


In [3]:
rent_by_zip = df_apartments.groupby('zip_code').agg({'price': ['mean', 'median', 'min', 'max', 'count'],'rent_per_bedroom': 'mean'}).reset_index()
rent_by_zip.columns = ['zip_code', 'avg_rent', 'median_rent', 'min_rent', 'max_rent', 'listing_count', 'avg_rent_per_bed']
rent_by_zip['avg_rent'] = rent_by_zip['avg_rent'].round(2)
rent_by_zip['median_rent'] = rent_by_zip['median_rent'].round(2)
rent_by_zip['avg_rent_per_bed'] = rent_by_zip[
'avg_rent_per_bed'].round(2)

print(rent_by_zip)

   zip_code  avg_rent  median_rent  min_rent  max_rent  listing_count  \
0     90005   2650.00       2650.0    1350.0    3950.0              8   
1     90006   2175.05       1867.0    1190.0    5349.0            236   
2     90007   2956.36       2418.0    1295.0    5990.0             44   
3     90015   3323.08       2925.0    1900.0    8326.0             96   
4     90018   3775.82       4299.0    1350.0    4299.0             68   
5     90019   2197.50       2185.0    2175.0    2245.0             16   
6     90026   1795.00       1795.0    1795.0    1795.0              4   
7     90037   1675.00       1675.0    1450.0    1900.0              8   
8     90057   1607.85       1495.0    1350.0    2732.0             52   

   avg_rent_per_bed  
0           1350.00  
1           1767.44  
2               NaN  
3           2270.79  
4           1593.56  
5           1098.75  
6               NaN  
7            950.00  
8           1536.45  


In [4]:
crime_by_zip = df_crime.groupby('zip_code').size().reset_index()
crime_by_zip.columns = ['zip_code', 'crime_count']

print(crime_by_zip)

    zip_code  crime_count
0      90004        20117
1      90005        20117
2      90006        21125
3      90007        30622
4      90010        20117
5      90012        30622
6      90013        30622
7      90014        30622
8      90015        30622
9      90017        30622
10     90018        23576
11     90019        21125
12     90020        21125
13     90021        30622
14     90026        20117
15     90037        23576
16     90057        20117


In [5]:
merged_data = rent_by_zip.merge(crime_by_zip, on = 'zip_code', how = 'inner')

print(merged_data)
print(merged_data.shape)

   zip_code  avg_rent  median_rent  min_rent  max_rent  listing_count  \
0     90005   2650.00       2650.0    1350.0    3950.0              8   
1     90006   2175.05       1867.0    1190.0    5349.0            236   
2     90007   2956.36       2418.0    1295.0    5990.0             44   
3     90015   3323.08       2925.0    1900.0    8326.0             96   
4     90018   3775.82       4299.0    1350.0    4299.0             68   
5     90019   2197.50       2185.0    2175.0    2245.0             16   
6     90026   1795.00       1795.0    1795.0    1795.0              4   
7     90037   1675.00       1675.0    1450.0    1900.0              8   
8     90057   1607.85       1495.0    1350.0    2732.0             52   

   avg_rent_per_bed  crime_count  
0           1350.00        20117  
1           1767.44        21125  
2               NaN        30622  
3           2270.79        30622  
4           1593.56        23576  
5           1098.75        21125  
6               NaN     

In [6]:
merged_data['crime_rate'] = merged_data['crime_count'] / merged_data['listing_count']
merged_data['crime_rate'] = merged_data['crime_rate'].round(2)

print(merged_data[['zip_code', 'crime_count', 'listing_count', 'crime_rate']])

   zip_code  crime_count  listing_count  crime_rate
0     90005        20117              8     2514.62
1     90006        21125            236       89.51
2     90007        30622             44      695.95
3     90015        30622             96      318.98
4     90018        23576             68      346.71
5     90019        21125             16     1320.31
6     90026        20117              4     5029.25
7     90037        23576              8     2947.00
8     90057        20117             52      386.87


In [7]:
correlation_matrix = np.corrcoef(merged_data['avg_rent'], merged_data['crime_rate'])
correlation = correlation_matrix[0, 1]

print("Corr rent nd crime rate:", round(correlation, 3))

if correlation < -0.5:
    print("low crme gives high rent")
elif correlation < -0.3:
    print("low crimemay give high rent")
elif correlation < 0.3:
    print("cirme donsnt decide rent ")
else:
    print("Higher crome gives higher rent")

Corr rent nd crime rate: -0.473
low crimemay give high rent


In [8]:
min_rent = merged_data['avg_rent'].min()
max_rent = merged_data['avg_rent'].max()
merged_data['rent_normalized'] = (merged_data['avg_rent'] - min_rent) / (max_rent - min_rent)
merged_data['rent_score'] = 1 - merged_data['rent_normalized']
min_crime = merged_data['crime_rate'].min()
max_crime = merged_data['crime_rate'].max()
merged_data['crime_normalized'] = (merged_data['crime_rate'] - min_crime) / (max_crime - min_crime)

merged_data['safety_score'] = 1 - merged_data['crime_normalized']
merged_data['overall_score'] = (merged_data['rent_score'] + merged_data['safety_score']) / 2
merged_data['rent_score'] = merged_data['rent_score'].round(3)
merged_data['safety_score'] = merged_data['safety_score'].round(3)
merged_data['overall_score'] = merged_data['overall_score'].round(3)
top_neighborhoods = merged_data.sort_values('overall_score', ascending=False).head(5)

print("\nbets neightbrhoods for usc studdents t live in:")
for index, row in top_neighborhoods.iterrows():
    print(index + 1)
    print("zip:", row['zip_code'])
    print("Avg rent: $", row['avg_rent'])
    print(" crime rate:", row['crime_rate'])
    print("Score:", row['overall_score'])



bets neightbrhoods for usc studdents t live in:
9
zip: 90057.0
Avg rent: $ 1607.85
 crime rate: 386.87
Score: 0.97
2
zip: 90006.0
Avg rent: $ 2175.05
 crime rate: 89.51
Score: 0.869
6
zip: 90019.0
Avg rent: $ 2197.5
 crime rate: 1320.31
Score: 0.739
8
zip: 90037.0
Avg rent: $ 1675.0
 crime rate: 2947.0
Score: 0.695
3
zip: 90007.0
Avg rent: $ 2956.36
 crime rate: 695.95
Score: 0.628


In [9]:
results_display = merged_data[['zip_code','avg_rent', 'median_rent','crime_count', 'crime_rate','listing_count', 'rent_score', 'safety_score', 'overall_score']]
results_display = results_display.sort_values('overall_score', ascending = False)

print(results_display.to_string(index = False))

 zip_code  avg_rent  median_rent  crime_count  crime_rate  listing_count  rent_score  safety_score  overall_score
    90057   1607.85       1495.0        20117      386.87             52       1.000         0.940          0.970
    90006   2175.05       1867.0        21125       89.51            236       0.738         1.000          0.869
    90019   2197.50       2185.0        21125     1320.31             16       0.728         0.751          0.739
    90037   1675.00       1675.0        23576     2947.00              8       0.969         0.422          0.695
    90007   2956.36       2418.0        30622      695.95             44       0.378         0.877          0.628
    90015   3323.08       2925.0        30622      318.98             96       0.209         0.954          0.581
    90005   2650.00       2650.0        20117     2514.62              8       0.519         0.509          0.514
    90018   3775.82       4299.0        23576      346.71             68       0.000    

In [10]:
output_file = '../data/processed/analysis_results.csv'
merged_data.to_csv(output_file, index = False)


print(output_file)


../data/processed/analysis_results.csv


In [11]:
print("cheaperst rent: $", merged_data['avg_rent'].min())
print("highest rent: $", merged_data['avg_rent'].max())
print("avg rent: $", round(merged_data['avg_rent'].mean(), 2))
print("lowest crime:", merged_data['crime_rate'].min())
print("highest crime :", merged_data['crime_rate'].max())
print("average crme:", round(merged_data['crime_rate'].mean(), 2))
print("num of zips:", len(merged_data))
print("num apts:", merged_data['listing_count'].sum())
print("num crime:", merged_data['crime_count'].sum())


cheaperst rent: $ 1607.85
highest rent: $ 3775.82
avg rent: $ 2461.74
lowest crime: 89.51
highest crime : 5029.25
average crme: 1516.58
num of zips: 9
num apts: 532
num crime: 210997
