In [71]:
# Import required libraries
import pandas as pd
from pymongo import MongoClient
import requests
import time
from sqlalchemy import create_engine
import plotly.express as px
import nest_asyncio
from dash import Dash, dcc, html
import json

# Loading the data into mongo DB

In [72]:
# MongoDB Connection
client = MongoClient("mongodb+srv://deepakbajare0602:iSP0ylPUGnAtrn5G@datacluster1.hlnws.mongodb.net/")  # MongoDB URI
db = client["AirQualityDB"]  # Database name
collection = db["AirQualityData"]  # Collection name

# API URL for the dataset
base_url = "https://data.cityofnewyork.us/resource/c3uy-2p5r.json"

# Define the query parameters (filter for records from 2023 onward)
params = {
    "$limit": 1000  # Fetch 1000 records per API call
}

# Loop to fetch and insert data in batches
offset = 0
batch_size = 1000
while True:
    params["$offset"] = offset
    
    # Send GET request to the API
    response = requests.get(base_url, params=params)
    
    if response.status_code == 200:
        data = response.json()
        
        # If there is no more data, break the loop
        if not data:
            print("No more data to fetch.")
            break
        
        # Insert data into MongoDB in batch of 1000
        try:
            if isinstance(data, list):
                collection.insert_many(data)
                # print(f"Inserted {len(data)} records into MongoDB, offset: {offset}")
            else:
                print(f"Unexpected data format at offset {offset}: {type(data)} ")
        except Exception as e:
            print(f"Error inserting data: {e}")
        
        finally:
            print("Records inserted into MondgoDB sucessfully!!")    
        
        # Increment offset for the next batch
        offset += batch_size
        
        # Add a small delay between requests to avoid hitting rate limits
        time.sleep(1)
    else:
        print(f"Failed to retrieve data: {response.status_code}")
        break

print("Data loading completed!!!!.")

Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
Records inserted into MondgoDB sucessfully!!
No more data to fetch.
Data loading completed!!!!.


# Fetching the Data from mongo DB

In [73]:
# Fetch data from MongoDB
data = collection.find()

# Convert the cursor to a list of dictionaries (JSON format)
data_list = list(data)

# Convert to DataFrame
df = pd.DataFrame(data_list)

# Debug: Print the available columns
print("Available columns:", df.columns)

# Define required columns (normalize to lowercase)
required_columns = ['unique_id', 'indicator_id', 'name', 'measure', 'measure_info',
                    'geo_type_name', 'geo_join_id', 'geo_place_name', 'time_period',
                    'start_date', 'data_value']

# Normalize column names in DataFrame and required_columns
df.columns = df.columns.str.lower()
required_columns = [col.lower() for col in required_columns]

# Filter only available columns
available_columns = [col for col in required_columns if col in df.columns]
print("Selected columns:", available_columns)

# Select relevant columns
airquality_df = df[available_columns]

# Save to CSV
airquality_df.to_csv("air_quality_dataset.csv", index=False)
print("Data saved to air_quality_dataset.csv")

Available columns: Index(['_id', 'unique_id', 'indicator_id', 'name', 'measure', 'measure_info',
       'geo_type_name', 'geo_join_id', 'geo_place_name', 'time_period',
       'start_date', 'data_value'],
      dtype='object')
Selected columns: ['unique_id', 'indicator_id', 'name', 'measure', 'measure_info', 'geo_type_name', 'geo_join_id', 'geo_place_name', 'time_period', 'start_date', 'data_value']
Data saved to air_quality_dataset.csv


In [75]:
# airqulity un-structured data from mongoDb
airquality_df.head(10)

Unnamed: 0,unique_id,indicator_id,name,measure,measure_info,geo_type_name,geo_join_id,geo_place_name,time_period,start_date,data_value
0,825967,375,Nitrogen dioxide (NO2),Mean,ppb,UHF34,104,Pelham - Throgs Neck,Summer 2022,2022-06-01T00:00:00.000,12.0
1,823492,365,Fine particles (PM 2.5),Mean,mcg/m3,CD,307,Sunset Park (CD7),Summer 2022,2022-06-01T00:00:00.000,6.7
2,827012,386,Ozone (O3),Mean,ppb,CD,313,Coney Island (CD13),Summer 2022,2022-06-01T00:00:00.000,37.7
3,827081,386,Ozone (O3),Mean,ppb,UHF34,103,Fordham - Bronx Pk,Summer 2022,2022-06-01T00:00:00.000,31.7
4,827103,386,Ozone (O3),Mean,ppb,UHF42,503,Willowbrook,Summer 2022,2022-06-01T00:00:00.000,34.8
5,823211,365,Fine particles (PM 2.5),Mean,mcg/m3,CD,105,Midtown (CD5),Summer 2022,2022-06-01T00:00:00.000,8.7
6,823241,365,Fine particles (PM 2.5),Mean,mcg/m3,UHF42,401,Long Island City - Astoria,Summer 2022,2022-06-01T00:00:00.000,7.2
7,825903,375,Nitrogen dioxide (NO2),Mean,ppb,UHF34,303,East Harlem,Summer 2022,2022-06-01T00:00:00.000,13.0
8,823337,365,Fine particles (PM 2.5),Mean,mcg/m3,Borough,2,Brooklyn,Summer 2022,2022-06-01T00:00:00.000,6.3
9,827065,386,Ozone (O3),Mean,ppb,UHF34,304,Upper West Side,Summer 2022,2022-06-01T00:00:00.000,29.9


In [76]:
# Shape of the Air-Quality data
print(f"shape of the air_quality{airquality_df.shape}")

shape of the air_quality(108150, 11)


In [77]:
# Info of the Air-Quality data
airquality_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 108150 entries, 0 to 108149
Data columns (total 11 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   unique_id       108150 non-null  object
 1   indicator_id    108150 non-null  object
 2   name            108150 non-null  object
 3   measure         108150 non-null  object
 4   measure_info    108150 non-null  object
 5   geo_type_name   108150 non-null  object
 6   geo_join_id     108096 non-null  object
 7   geo_place_name  108096 non-null  object
 8   time_period     108150 non-null  object
 9   start_date      108150 non-null  object
 10  data_value      108150 non-null  object
dtypes: object(11)
memory usage: 9.1+ MB


In [78]:
def preprocess_airQuaility_dataset(airquality_df):
    
    # Handle Missing/Null Values
    data_before_drop_null =  airquality_df.isnull().sum()
    
    # Drop rows with all missing/NULL values
    airquality_df = airquality_df.dropna()

    # Cheking Missing/Null values again
    data_after_drop_null = airquality_df.isnull().sum()
    print("Removed null values")

    # Cheking duplicate values
    has_duplicates = airquality_df.duplicated().any()
    if has_duplicates:
        airquality_df = airquality_df.drop_duplicates()
        print("Duplicates removed.")
 
    return airquality_df

processed_airquality_df = preprocess_airQuaility_dataset(airquality_df)

Removed null values
Duplicates removed.


In [79]:
processed_airquality_df.shape

(17629, 11)

# Loaded the NYPD Data set

In [80]:
def load_NYPD_data(url):
          
           #loading the data 
          nypd_df=pd.read_csv(url)

          #Dispplaying the dataSet
          nypd_data=nypd_df.head(10)
          print(f"NYPD Data:\n {nypd_data}\n")
          
          #Displaying the shape of dataset
          nypd_shape=nypd_df.shape
          print(f"NYPD Data: shape:\n{nypd_shape}\n")

          nypd_info=nypd_df.info()
          print(f"NYPD Info:\n{nypd_info}")

url = "/Users/deepakbajare/Documents/Projects/Dap/NYPD_Arrest_Data__Year_to_Date_.csv"

load_NYPD_data(url)

NYPD Data:
    ARREST_KEY ARREST_DATE  PD_CD                   PD_DESC  KY_CD  \
0   281240883  01/28/2024  105.0         STRANGULATION 1ST  106.0   
1   282884120  02/27/2024  263.0               ARSON 2,3,4  114.0   
2   283137868  03/03/2024  109.0  ASSAULT 2,1,UNCLASSIFIED  106.0   
3   287001362  05/16/2024  109.0  ASSAULT 2,1,UNCLASSIFIED  106.0   
4   287829614  06/02/2024  105.0         STRANGULATION 1ST  106.0   
5   280513565  01/14/2024  153.0                    RAPE 3  104.0   
6   291269261  08/07/2024  157.0                    RAPE 1  104.0   
7   280286274  01/10/2024  105.0         STRANGULATION 1ST  106.0   
8   281035905  01/24/2024  777.0                    (null)    NaN   
9   279805425  01/02/2024  109.0  ASSAULT 2,1,UNCLASSIFIED  106.0   

        OFNS_DESC    LAW_CODE LAW_CAT_CD ARREST_BORO  ARREST_PRECINCT  \
0  FELONY ASSAULT  PL 1211200          F           Q              105   
1           ARSON  PL 1501001          F           Q              107   
2  FELONY

# Data preprocessing for NYPD

In [81]:

def preprocess_NYPD_Arrest_Data__Year_to_Date(nypd_df):
    # Checking and summarizing missing/null values before processing
    print("Missing values before processing:")
    print(nypd_df.isnull().sum())

    # Drop rows with all missing/NULL values
    nypd_df = nypd_df.dropna()

    # Checking and summarizing missing/null values after dropping
    print("Missing values after processing:")
    print(nypd_df.isnull().sum())

    # Checking for duplicate values
    has_duplicates = nypd_df.duplicated().any()
    print(f"Duplicates found: {has_duplicates}")
    if has_duplicates:
        nypd_df = nypd_df.drop_duplicates()
        print("Duplicates removed.")
    else:
        print("No duplicate rows found.")

    print("Preprocessing complete.")
    return nypd_df

nypd_df = pd.read_csv("NYPD_Arrest_Data__Year_to_Date_.csv")
processed_nypd_df = preprocess_NYPD_Arrest_Data__Year_to_Date(nypd_df)


Missing values before processing:
ARREST_KEY                     0
ARREST_DATE                    0
PD_CD                          6
PD_DESC                        0
KY_CD                         26
OFNS_DESC                      0
LAW_CODE                       0
LAW_CAT_CD                  1109
ARREST_BORO                    0
ARREST_PRECINCT                0
JURISDICTION_CODE              0
AGE_GROUP                      0
PERP_SEX                       0
PERP_RACE                      0
X_COORD_CD                     0
Y_COORD_CD                     0
Latitude                       0
Longitude                      0
New Georeferenced Column       0
dtype: int64
Missing values after processing:
ARREST_KEY                  0
ARREST_DATE                 0
PD_CD                       0
PD_DESC                     0
KY_CD                       0
OFNS_DESC                   0
LAW_CODE                    0
LAW_CAT_CD                  0
ARREST_BORO                 0
ARREST_PRECINCT        

In [82]:
processed_nypd_df.shape

(194312, 19)

In [83]:
processed_nypd_df.columns

Index(['ARREST_KEY', 'ARREST_DATE', 'PD_CD', 'PD_DESC', 'KY_CD', 'OFNS_DESC',
       'LAW_CODE', 'LAW_CAT_CD', 'ARREST_BORO', 'ARREST_PRECINCT',
       'JURISDICTION_CODE', 'AGE_GROUP', 'PERP_SEX', 'PERP_RACE', 'X_COORD_CD',
       'Y_COORD_CD', 'Latitude', 'Longitude', 'New Georeferenced Column'],
      dtype='object')

# Crime_Data_from_2020_to_Present

In [84]:
def load_crime_data(url):
          
           #loading the data 
          crime_df=pd.read_csv(url)

          #Dispplaying the dataSet
          crime_data=crime_df.head()
          print(f"crime_dataset:\n {crime_data}\n")
          
          #Displaying the shape of dataset
          crime_shape=crime_df.shape
          print(f"crime_data shape:\n{crime_shape}\n")

          #Displaying the info of dataset
          print(f"crime_data info:{crime_df.info()}")

url = "/Users/deepakbajare/Documents/Projects/Dap/Crime_Data_from_2020_to_Present.csv"

load_crime_data(url)

crime_dataset:
        DR_NO               Date Rptd                DATE OCC  TIME OCC  AREA  \
0  190326475  03/01/2020 12:00:00 AM  03/01/2020 12:00:00 AM      2130     7   
1  200106753  02/09/2020 12:00:00 AM  02/08/2020 12:00:00 AM      1800     1   
2  200320258  11/11/2020 12:00:00 AM  11/04/2020 12:00:00 AM      1700     3   
3  200907217  05/10/2023 12:00:00 AM  03/10/2020 12:00:00 AM      2037     9   
4  200412582  09/09/2020 12:00:00 AM  09/09/2020 12:00:00 AM       630     4   

    AREA NAME  Rpt Dist No  Part 1-2  Crm Cd  \
0    Wilshire          784         1     510   
1     Central          182         1     330   
2   Southwest          356         1     480   
3    Van Nuys          964         1     343   
4  Hollenbeck          413         1     510   

                                Crm Cd Desc  ... Status   Status Desc  \
0                          VEHICLE - STOLEN  ...     AA  Adult Arrest   
1                     BURGLARY FROM VEHICLE  ...     IC   Invest Con

In [85]:
def preprocess_crime_data(crime_df):


     # Checking for duplicate values
    has_duplicates = crime_df.duplicated().any()
    print(f"Duplicates found: {has_duplicates}")
    if has_duplicates:
        crime_df = crime_df.drop_duplicates()
        print("Duplicates removed.")
    else:
        print("No duplicate rows found.")

    print("Preprocessing complete.")
 
    return crime_df


crime_df = pd.read_csv("Crime_Data_from_2020_to_Present.csv")
processed_crime_data = preprocess_crime_data(crime_df)

Duplicates found: True
Duplicates removed.
Preprocessing complete.


In [86]:
processed_crime_data.columns

Index(['DR_NO', 'Date Rptd', 'DATE OCC', 'TIME OCC', 'AREA', 'AREA NAME',
       'Rpt Dist No', 'Part 1-2', 'Crm Cd', 'Crm Cd Desc', 'Mocodes',
       'Vict Age', 'Vict Sex', 'Vict Descent', 'Premis Cd', 'Premis Desc',
       'Weapon Used Cd', 'Weapon Desc', 'Status', 'Status Desc', 'Crm Cd 1',
       'Crm Cd 2', 'Crm Cd 3', 'Crm Cd 4', 'LOCATION', 'Cross Street', 'LAT',
       'LON'],
      dtype='object')

In [87]:
import sqlalchemy
import psycopg2

print(f"Pandas version: {pd.__version__}")
print(f"SQLAlchemy version: {sqlalchemy.__version__}")
print(f"Psycopg2 version: {psycopg2.__version__}")

Pandas version: 2.2.3
SQLAlchemy version: 2.0.36
Psycopg2 version: 2.9.10 (dt dec pq3 ext lo64)


# Loading the preprocessing data into postgres sql

In [88]:
from sqlalchemy import create_engine
import pandas as pd  # Required for DataFrame operations

# PostgreSQL connection URI
postgre_connection_string = "postgresql+psycopg2://postgres:Admin@localhost:5432/myDatabase"

def store_preprocessed_data(preprocessed_air_quality):
    """
    Stores preprocessed air quality data into a PostgreSQL database.

    Args:
        preprocessed_air_quality (pd.DataFrame): The preprocessed air quality data as a pandas DataFrame.
    """
    try:
        # Establish database connection
        engine = create_engine(postgre_connection_string)

        # Define table name
        table_name = "AirQuality_Dataset"

        # Store DataFrame to PostgreSQL
        preprocessed_air_quality.to_sql(table_name, engine, if_exists='replace', index=False)

        print("Air Quality Data stored in PostgreSQL successfully!")
    except Exception as e:
        print(f"An error occurred while storing data: {e}")

# Example usage:
# Assuming `processed_airquality_df` is a pandas DataFrame
# processed_airquality_df = pd.DataFrame(...)  # Replace with actual DataFrame
store_preprocessed_data(processed_airquality_df)


Air Quality Data stored in PostgreSQL successfully!


In [89]:
from sqlalchemy import create_engine
import pandas as pd  # Import pandas if you are working with DataFrames

# PostgreSQL connection URI
postgre_connection_string = "postgresql+psycopg2://postgres:Admin@localhost:5432/myDatabase"

def store_preprocessed_data(preprocess_crime_data):
    """
    Stores preprocessed crime data into a PostgreSQL database.

    Args:
        preprocess_crime_data (pd.DataFrame): The preprocessed crime data as a pandas DataFrame.
    """
    try:
        # Establish database connection using a context manager
        with create_engine(postgre_connection_string).connect() as conn:
            # Define table name
            table_name = "Crime_Dataset"

            # Store DataFrame to PostgreSQL
            preprocess_crime_data.to_sql(table_name, conn, if_exists='replace', index=False)

            print("Crime Data stored in PostgreSQL successfully!")
    except Exception as e:
        print(f"An error occurred while storing data: {e}")

# Example of calling the function
# Assuming `processed_crime_data` is a pandas DataFrame
# processed_crime_data = pd.DataFrame(...)  # Replace with actual DataFrame
store_preprocessed_data(processed_crime_data)


Crime Data stored in PostgreSQL successfully!


In [90]:
from sqlalchemy import create_engine
import pandas as pd  # Required for DataFrame operations

# PostgreSQL connection URI
postgre_connection_string = "postgresql+psycopg2://postgres:Admin@localhost:5432/myDatabase"

def store_preprocessed_data(preprocessed_nypd_data):
    """
    Stores preprocessed NYPD crime data into a PostgreSQL database.

    Args:
        preprocessed_nypd_data (pd.DataFrame): The preprocessed NYPD crime data as a pandas DataFrame.
    """
    try:
        # Establish database connection
        engine = create_engine(postgre_connection_string)

        # Define table name
        table_name = "NYPD_Dataset"

        # Store DataFrame to PostgreSQL
        preprocessed_nypd_data.to_sql(table_name, engine, if_exists='replace', index=False)

        print("NYPD Data stored in PostgreSQL successfully!")
    except Exception as e:
        print(f"An error occurred while storing NYPD data: {e}")

# Example usage
# Assuming `processed_nypd_df` is a pandas DataFrame
# processed_nypd_df = pd.DataFrame(...)  # Replace with actual DataFrame
store_preprocessed_data(processed_nypd_df)


NYPD Data stored in PostgreSQL successfully!


In [91]:
# Step 5: Preprocess Data
processed_airquality_df['data_value'] = processed_airquality_df['data_value'].fillna(0)
processed_airquality_df['start_date'] = pd.to_datetime(processed_airquality_df['start_date'], errors='coerce')
processed_airquality_df['data_value'] = pd.to_numeric(processed_airquality_df['data_value'], errors='coerce')

processed_crime_data['Vict Age'].fillna(processed_crime_data['Vict Age'].mean(), inplace=True)
processed_crime_data['Vict Sex'].fillna('U', inplace=True)
processed_crime_data['TIME OCC'] = processed_crime_data['TIME OCC'].astype(int)

processed_nypd_df['ARREST_DATE'] = pd.to_datetime(processed_nypd_df['ARREST_DATE'], errors='coerce')
processed_nypd_df['Latitude'] = pd.to_numeric(processed_nypd_df['Latitude'], errors='coerce')
processed_nypd_df['Longitude'] = pd.to_numeric(processed_nypd_df['Longitude'], errors='coerce')

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  processed_crime_data['Vict Age'].fillna(processed_crime_data['Vict Age'].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  processed_crime_data['Vict Age'].fillna(processed_crime_data['Vict Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inpla

In [97]:
# Step 3: Visualization and Dashboard
nest_asyncio.apply()
app = Dash(__name__)

# Visualizations for Air Quality Data
processed_airquality_df['data_value'] = pd.to_numeric(airquality_df['data_value'], errors='coerce')
processed_airquality_df = processed_airquality_df.dropna(subset=['data_value'])
bins = [0, 10, 20, 30, 40, 50, 100, 150, 200, 300, 400]
labels = ['0-10', '10-20', '20-30', '30-40', '40-50', '50-100', '100-150', '150-200', '200-300', '300-400']
processed_airquality_df['data_value_bin'] = pd.cut(processed_airquality_df['data_value'], bins=bins, labels=labels)
fig1 = px.histogram(processed_airquality_df, x='data_value_bin', title="Distribution of Air Quality Data Values (Binned)", category_orders={"Data Value Bin": labels})
fig1.update_layout(xaxis_title="Data Value Range", yaxis_title="Count", bargap=0.2)

# Aggregate air quality data by date
aggregated_airquality = processed_airquality_df.groupby('start_date')['data_value'].mean().reset_index()

# Create line plot
fig2 = px.line(
    aggregated_airquality,
    x='start_date',
    y='data_value',
    title="Air Quality Over Time",
    labels={'start_date': 'Date', 'data_value': 'Average Data Value'}
)

# Visualizations for Crime Data
crime_by_area = processed_crime_data.groupby(['AREA NAME']).size().reset_index(name='Count')
fig3 = px.bar(crime_by_area, x='AREA NAME', y='Count', title="Crime Incidents by Area")

processed_crime_data['Hour'] = processed_crime_data['TIME OCC'] // 100  # Assuming TIME OCC is in HHMM format
crime_by_hour = processed_crime_data.groupby('Hour').size().reset_index(name='Count')
fig4 = px.line(
    crime_by_hour, x='Hour', y='Count', 
    title="Crime Incidents by Hour of the Day",
    labels={'Hour': 'Hour of the Day', 'Count': 'Number of Incidents'}
)
fig4.update_traces(line=dict(width=2))
fig4.update_layout(xaxis=dict(tickmode='linear', tick0=0, dtick=1))

# Crime Data Visualization
gender_counts = processed_crime_data['Vict Sex'].value_counts().reset_index()
gender_counts.columns = ['Gender', 'Count']
fig5 = px.bar(gender_counts, x='Gender', y='Count', title="Victim Gender Distribution", labels={'Gender': 'Gender', 'Count': 'Count'})

# Visualizations for NYPD Arrest Data
fig6 = px.histogram(processed_nypd_df, x='ARREST_BORO', title="Arrests by Borough")

fig7 = px.line(
    processed_nypd_df.groupby('ARREST_DATE').size().reset_index(name='Count'),
    x='ARREST_DATE', y='Count', title="Arrest Trends Over Time"
)

# Heatmap for Arrest Data
heatmap_data = processed_nypd_df.dropna(subset=['Latitude', 'Longitude'])
fig8 = px.density_mapbox(
    heatmap_data, lat='Latitude', lon='Longitude', radius=10,
    mapbox_style='carto-positron', zoom=10,
    title="Heatmap of Arrest Locations"
)
fig8.update_layout(
    mapbox_center={"lat": heatmap_data['Latitude'].mean(), "lon": heatmap_data['Longitude'].mean()},
    margin={"r": 0, "t": 30, "l": 0, "b": 0},
    title_x=0.5
)

# Dashboard Layout
app.layout = html.Div([
    html.H1("Data Analysis Dashboard", style={'text-align': 'center', 'color': 'white'}),
    dcc.Tabs([
        dcc.Tab(label='Air Quality Data', children=[
            html.Div([dcc.Graph(figure=fig1), dcc.Graph(figure=fig2)])
        ]),
        dcc.Tab(label='Crime Data', children=[
            html.Div([dcc.Graph(figure=fig3), dcc.Graph(figure=fig4), dcc.Graph(figure=fig5)])
        ]),
        dcc.Tab(label='NYPD Arrest Data', children=[
            html.Div([dcc.Graph(figure=fig6), dcc.Graph(figure=fig7), dcc.Graph(figure=fig8)])
        ])
    ])
])

# Run the Dashboard
app.run_server(mode='inline', debug=False)