# Final Analysis Notebook

This notebook combines educational attainment data and crime data, cleans them, and generates visualizations.

In [None]:
# Import required libraries
import pandas as pd
import os
import re
import matplotlib.pyplot as plt

## 1. Combine Educational Attainment Data

In [None]:
# Define file paths for educational data
edu_file_paths = [
    "Educational_Attainment2018.csv",
    "Educational_Attainment2019.csv",
    "Educational_Attainment2020.csv",
    "Educational_Attainment2021.csv",
    "Educational_Attainment2022.csv"
]

# Combine educational data
edu_combined_df = pd.DataFrame()
for file in edu_file_paths:
    if os.path.exists(file):
        df = pd.read_csv(file)
        year = file.split("Attainment")[1].split(".")[0]
        df['year'] = int(year)
        edu_combined_df = pd.concat([edu_combined_df, df], ignore_index=True)

# Save combined educational data
edu_combined_df.to_csv("Educational_Attainment_Combined.csv", index=False)
print("Combined Educational Data Saved!")

## 2. Combine Crime Data

In [None]:
# Define file paths for crime data
crime_file_paths = [
    "HoustonLargestPD.csv",
    "ChicagoLargestPD.csv",
    "NewYorkLargestPD.csv",
    "PhoenixLargestPD.csv",
    "LosAngelesLargestPD.csv"
]

# Combine crime data
crime_combined_df = pd.DataFrame()
for file in crime_file_paths:
    if os.path.exists(file):
        df = pd.read_csv(file)
        crime_combined_df = pd.concat([crime_combined_df, df], ignore_index=True)

# Save combined crime data
crime_combined_df.to_csv("Raw_Crime_Data.csv", index=False)
print("Combined Crime Data Saved!")

## 3. Clean Combined Crime Data

In [None]:
crime_df = pd.read_csv("Raw_Crime_Data.csv")

# Extract city and year columns
year_columns = crime_df.filter(regex=r"\\d{4}").columns
crime_df = crime_df.melt(id_vars=["series"], value_vars=year_columns, var_name="year", value_name="value")

# Clean and save crime data
crime_df = crime_df.dropna(subset=["year", "value"])
crime_df.to_csv("Cleaned_Crime_Data.csv", index=False)
print("Cleaned Crime Data Saved!")

## 4. Plot Crime Trends

In [None]:
crime_summary = crime_df.groupby("year")["value"].sum().reset_index()
plt.figure(figsize=(10, 6))
plt.plot(crime_summary["year"], crime_summary["value"], marker='o', color='red')
plt.title("Crime Trends Over Time")
plt.xlabel("Year")
plt.ylabel("Total Crime Values")
plt.grid(True)
plt.show()

## 5. Plot Educational Data Trends

In [None]:
edu_summary = edu_combined_df.groupby("year")["value"].sum().reset_index()
plt.figure(figsize=(10, 6))
plt.plot(edu_summary["year"], edu_summary["value"], marker='o', color='blue')
plt.title("Educational Data Trends Over Time")
plt.xlabel("Year")
plt.ylabel("Total Educational Values")
plt.grid(True)
plt.show()