In [78]:
import pandas as pd

# Load the datasets
population_df = pd.read_csv('Population.csv')
fire_df = pd.read_csv('Fire.csv')
citation_df = pd.read_csv('CITATIONS.csv')
arrest_df = pd.read_csv('arrests.csv')

# Normalize neighborhood names
def normalize_neighborhood(name):
    if pd.isna(name):
        return ''
    name = str(name).strip().lower()
    if 'mt. oliver' in name or 'mount oliver' in name:
        return 'mount oliver'
    return name

# Apply normalization
population_df['Neighborhood'] = population_df['Neighborhood'].apply(normalize_neighborhood)
fire_df['neighborhood'] = fire_df['neighborhood'].apply(normalize_neighborhood)
citation_df['NEIGHBORHOOD'] = citation_df['NEIGHBORHOOD'].apply(normalize_neighborhood)
arrest_df['INCIDENTNEIGHBORHOOD'] = arrest_df['INCIDENTNEIGHBORHOOD'].apply(normalize_neighborhood)

# Process Population Data
population_df = population_df[['Neighborhood', '2020_Total_Population']]
population_df.rename(columns={'2020_Total_Population': 'Population'}, inplace=True)

# Process Fire Data
fire_df['Neighborhood'] = fire_df['neighborhood'].astype(str)
fire_count = fire_df.groupby('Neighborhood').size().reset_index(name='Fire_Count')

# Process Citation Data
citation_df.rename(columns={'NEIGHBORHOOD': 'Neighborhood'}, inplace=True)
citation_count = citation_df.groupby('Neighborhood').size().reset_index(name='Citation_Count')

# Process Arrest Data
arrest_df['INCIDENTNEIGHBORHOOD'] = arrest_df['INCIDENTNEIGHBORHOOD'].astype(str)
arrest_count = arrest_df.groupby('INCIDENTNEIGHBORHOOD').size().reset_index(name='Arrests')
arrest_count.rename(columns={'INCIDENTNEIGHBORHOOD': 'Neighborhood'}, inplace=True)

# Merge All Datasets
combined_data = population_df.copy()
combined_data = combined_data.merge(fire_count, on='Neighborhood', how='outer')
combined_data = combined_data.merge(citation_count, on='Neighborhood', how='outer')
combined_data = combined_data.merge(arrest_count, on='Neighborhood', how='outer')

# Fill Missing Values with 0
combined_data.fillna(0, inplace=True)

# Sum duplicates and group by Neighborhood
combined_data = combined_data.groupby('Neighborhood').agg({
    'Population': 'sum',
    'Fire_Count': 'sum',
    'Citation_Count': 'sum',
    'Arrests': 'sum'
}).reset_index()

# Filter out neighborhoods with any zero counts in incidents and remove "outside county" and "outside state"
filtered_data = combined_data[(combined_data['Fire_Count'] > 0) & (combined_data['Citation_Count'] > 0) & (combined_data['Arrests'] > 0)]
filtered_data = filtered_data[~filtered_data['Neighborhood'].str.contains('outside')]

# Calculate the total score
filtered_data['Total_Score'] = (filtered_data['Arrests'] * 50) + (filtered_data['Citation_Count'] * 25) + (filtered_data['Fire_Count'] * 25)

# Get Top 10 Neighborhoods with the highest scores
top_neighborhoods = filtered_data.nsmallest(10, 'Total_Score').reset_index(drop=True)

# Display Results
print("Top 10 Neighborhoods (Based on Combined Incidents):")
print("=" * 60)
for rank, row in enumerate(top_neighborhoods.itertuples(), start=1):
    print(f"Rank {rank}: {row.Neighborhood}")
    print(f"  Total Population: {row.Population}")
    print(f"  Total Arrests: {row.Arrests}")
    print(f"  Total Citations: {row.Citation_Count}")
    print(f"  Total Fire Incidents: {row.Fire_Count}")
    print(f"  Total Score: {row.Total_Score:.2f}")
    print("-" * 60)


Top 10 Neighborhoods (Based on Combined Incidents):
Rank 1: ridgemont
  Total Population: 390.0
  Total Arrests: 37.0
  Total Citations: 4.0
  Total Fire Incidents: 12.0
  Total Score: 2250.00
------------------------------------------------------------
Rank 2: regent square
  Total Population: 971.0
  Total Arrests: 37.0
  Total Citations: 9.0
  Total Fire Incidents: 9.0
  Total Score: 2300.00
------------------------------------------------------------
Rank 3: new homestead
  Total Population: 917.0
  Total Arrests: 39.0
  Total Citations: 8.0
  Total Fire Incidents: 22.0
  Total Score: 2700.00
------------------------------------------------------------
Rank 4: swisshelm park
  Total Population: 1339.0
  Total Arrests: 43.0
  Total Citations: 6.0
  Total Fire Incidents: 17.0
  Total Score: 2725.00
------------------------------------------------------------
Rank 5: chartiers city
  Total Population: 492.0
  Total Arrests: 46.0
  Total Citations: 3.0
  Total Fire Incidents: 18.0
  To

In [92]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load the datasets
arrest_data = pd.read_csv('arrests.csv')
fire_data = pd.read_csv('Fire.csv')
citation_data = pd.read_csv('CITATIONS.csv')
pop_data = pd.read_csv('Population.csv')

# Process arrest data
arrest_data.dropna(inplace=True)
arrest_data['INCIDENTNEIGHBORHOOD'] = arrest_data['INCIDENTNEIGHBORHOOD'].astype(str)
neighborhood_arrests = arrest_data['INCIDENTNEIGHBORHOOD'].value_counts().reset_index()
neighborhood_arrests.columns = ['Neighborhood', 'Arrests']
pop_data = pop_data[['Neighborhood', '2020_Total_Population']]
pop_data.rename(columns={'2020_Total_Population': 'Population'}, inplace=True)
merged_arrest_data = pd.merge(neighborhood_arrests, pop_data, on='Neighborhood')
merged_arrest_data['Arrests_Per_Capita'] = merged_arrest_data['Arrests'] / merged_arrest_data['Population']

# Process fire data
fire_data['Neighborhood'] = fire_data['neighborhood'].astype(str)
fire_count = fire_data.groupby('Neighborhood').size().reset_index(name='Fire_Count')
combined_fire_data = pd.merge(fire_count, pop_data, on='Neighborhood', how='inner')
combined_fire_data = combined_fire_data.groupby('Neighborhood').agg({
    'Fire_Count': 'sum',
    'Population': 'sum'
}).reset_index()
combined_fire_data['Fire_Incidents_Per_Thousand'] = (combined_fire_data['Fire_Count'] / combined_fire_data['Population']) * 1000

# Process citation data
citation_data.rename(columns={'NEIGHBORHOOD': 'Neighborhood'}, inplace=True)
citation_count = citation_data.groupby('Neighborhood').size().reset_index(name='Citation_Count')
combined_citation_data = pd.merge(citation_count, pop_data, on='Neighborhood', how='inner')
combined_citation_data['Citations_Per_Thousand'] = (combined_citation_data['Citation_Count'] / combined_citation_data['Population']) * 1000

# Merge all data
final_data = pd.merge(merged_arrest_data, combined_fire_data[['Neighborhood', 'Fire_Incidents_Per_Thousand', 'Fire_Count']], on='Neighborhood')
final_data = pd.merge(final_data, combined_citation_data[['Neighborhood', 'Citations_Per_Thousand', 'Citation_Count']], on='Neighborhood')

# Remove neighborhoods with 0s in any category
final_data = final_data[(final_data['Arrests'] > 0) & 
                        (final_data['Fire_Count'] > 0) & 
                        (final_data['Citation_Count'] > 0)]

# Normalize the data using Min-Max Scaling
scaler = MinMaxScaler()
final_data[['Arrests_Per_Capita', 'Fire_Incidents_Per_Thousand', 'Citations_Per_Thousand']] = scaler.fit_transform(final_data[['Arrests_Per_Capita', 'Fire_Incidents_Per_Thousand', 'Citations_Per_Thousand']])

# Assign weights with arrests at 50% and others at 25%
weights = {'Arrests_Per_Capita': 50, 'Fire_Incidents_Per_Thousand': 25, 'Citations_Per_Thousand': 25}

# Calculate the combined score
final_data['Combined_Score'] = (final_data['Arrests_Per_Capita'] * weights['Arrests_Per_Capita'] +
                                final_data['Fire_Incidents_Per_Thousand'] * weights['Fire_Incidents_Per_Thousand'] +
                                final_data['Citations_Per_Thousand'] * weights['Citations_Per_Thousand'])

# Normalize the combined score to a range of 0 to 100
final_data['Combined_Score'] = 100 * final_data['Combined_Score'] / final_data['Combined_Score'].max()

# Rank the neighborhoods
final_data['Rank'] = final_data['Combined_Score'].rank(ascending=False)

# Get the top 10 neighborhoods
top_10_neighborhoods = final_data.nsmallest(10, 'Rank')

# Output the top 10 neighborhoods with details
for rank, row in top_10_neighborhoods.iterrows():
    print(f"Rank {int(row['Rank'])}: {row['Neighborhood']}")
    print(f"  - Population: {row['Population']}")
    print(f"  - Total Arrests: {row['Arrests']}")
    print(f"  - Total Fire Incidents: {row['Fire_Count']}")
    print(f"  - Total Citations: {row['Citation_Count']}")
    print(f"  - Combined Score: {row['Combined_Score']:.6f}")
    print()  # Print a new line for better readability


Rank 1: Chateau
  - Population: 19
  - Total Arrests: 359
  - Total Fire Incidents: 43
  - Total Citations: 66
  - Combined Score: 100.000000

Rank 2: South Shore
  - Population: 29
  - Total Arrests: 227
  - Total Fire Incidents: 26
  - Total Citations: 52
  - Combined Score: 43.418015

Rank 3: North Shore
  - Population: 301
  - Total Arrests: 730
  - Total Fire Incidents: 80
  - Total Citations: 577
  - Combined Score: 23.004935

Rank 4: West End
  - Population: 205
  - Total Arrests: 226
  - Total Fire Incidents: 23
  - Total Citations: 31
  - Combined Score: 5.073400

Rank 5: East Allegheny
  - Population: 1903
  - Total Arrests: 2120
  - Total Fire Incidents: 141
  - Total Citations: 360
  - Combined Score: 4.955167

Rank 6: South Side Flats
  - Population: 7467
  - Total Arrests: 3262
  - Total Fire Incidents: 291
  - Total Citations: 3453
  - Combined Score: 4.740974

Rank 7: Homewood West
  - Population: 642
  - Total Arrests: 684
  - Total Fire Incidents: 63
  - Total Citatio