In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import re

In [2]:
# Define API details
BASE_URL = "https://data.boston.gov/api/3/action/datastore_search"
RESOURCE_IDS = [
    "9d7c2214-4709-478a-a2e8-fb2020a5bb94"
]

def fetch_data(resource_id):
    """Fetch data from the API given a resource ID."""
    offset = 0
    limit = 100000  # Adjust based on API limits
    all_records = []

    while True:
        params = {"resource_id": resource_id, "limit": limit, "offset": offset}
        response = requests.get(BASE_URL, params=params)

        if response.status_code == 200:
            data = response.json()
            records = data.get("result", {}).get("records", [])
            if not records:
                break
            all_records.extend(records)
            offset += limit
        else:
            print(f"Failed to fetch data for resource {resource_id}: {response.status_code}")
            break

    return pd.DataFrame(all_records) if all_records else pd.DataFrame()

def main():
    """Fetch and combine data from all resource IDs."""
    all_data = []

    for resource_id in RESOURCE_IDS:
        print(f"Fetching data for resource ID: {resource_id}")
        df = fetch_data(resource_id)
        if not df.empty:
            all_data.append(df)

    if all_data:
        combined_df = pd.concat(all_data, ignore_index=True)
        combined_df.to_csv("boston_311_2025T1.csv", index=False)
        print("Data fetching complete. Saved as 'boston_311_combined.csv'.")
    else:
        print("No data was fetched.")

if __name__ == "__main__": #__name__ corrected here
    main()

Fetching data for resource ID: 9d7c2214-4709-478a-a2e8-fb2020a5bb94
Data fetching complete. Saved as 'boston_311_combined.csv'.


In [3]:
df = pd.read_csv("boston_311_2025T1.csv")
df.head()

Unnamed: 0,_id,case_enquiry_id,open_dt,sla_target_dt,closed_dt,on_time,case_status,closure_reason,case_title,subject,...,neighborhood,neighborhood_services_district,ward,precinct,location_street_name,location_zipcode,latitude,longitude,geom_4326,source
0,1,101005897291,2025-01-31 11:02:14,,2025-01-31 11:39:03,ONTIME,Closed,Case Closed. Closed date : Fri Jan 31 16:39:03...,Needle Pickup,Mayor's 24 Hour Hotline,...,Dorchester,7,13,1303,151 Mount Vernon St,2125.0,42.320813,-71.051774,0101000020E6100000B1223E4350C351C06A1DE2641029...,Citizens Connect App
1,2,101005897292,2025-01-31 11:02:32,2025-02-03 11:02:35,2025-01-31 14:01:52,ONTIME,Closed,Case Closed. Closed date : Fri Jan 31 19:01:52...,Pick up Dead Animal,Public Works Department,...,South End,6,Ward 4,403,85 Pembroke St,2118.0,42.34207,-71.076121,0101000020E6100000454C9D28DFC451C073E552EFC82B...,Citizens Connect App
2,3,101005897280,2025-01-31 10:46:00,2025-02-01 10:46:28,2025-02-03 03:10:00,OVERDUE,Closed,Case Closed. Closed date : 2025-02-03 08:10:00...,Unsafe/Dangerous Conditions,Inspectional Services,...,Fenway / Kenmore / Audubon Circle / Longwood,14,Ward 5,502,19 Queensberry St,2215.0,42.34332,-71.095884,0101000020E610000054C8D1F422C651C07B5D4CE5F12B...,Employee Generated
3,4,101005922144,2025-02-21 16:28:44,2025-02-23 16:28:46,2025-02-22 05:53:26,ONTIME,Closed,Case Closed. Closed date : Sat Feb 22 10:53:26...,Unshoveled Sidewalk,Public Works Department,...,Hyde Park,10,Ward 18,1806,54 Tampa St,2126.0,42.27233,-71.109231,0101000020E6100000BAC2FCA2FDC651C01E6B3FB2DB22...,Employee Generated
4,5,101005922146,2025-02-21 16:34:00,2025-03-10 04:30:00,,OVERDUE,Open,,Sign Repair,Transportation - Traffic Division,...,Dorchester,7,16,1611,INTERSECTION Milton St &amp; Adams St,,42.280036,-71.058942,0101000020E6100000A68279B4C5C351C0B4F4CF34D823...,Employee Generated


In [4]:
df.shape

(32000, 31)

In [5]:
import re
import nltk
from nltk.corpus import stopwords
nltk.download()

NLTK Downloader
---------------------------------------------------------------------------
    d) Download   l) List    u) Update   c) Config   h) Help   q) Quit
---------------------------------------------------------------------------
Downloader> d

Download which package (l=list; x=cancel)?
  Identifier> l
Packages:
  [ ] abc................. Australian Broadcasting Commission 2006
  [ ] alpino.............. Alpino Dutch Treebank
  [ ] averaged_perceptron_tagger Averaged Perceptron Tagger
  [ ] averaged_perceptron_tagger_eng Averaged Perceptron Tagger (JSON)
  [ ] averaged_perceptron_tagger_ru Averaged Perceptron Tagger (Russian)
  [ ] averaged_perceptron_tagger_rus Averaged Perceptron Tagger (Russian)
  [ ] basque_grammars..... Grammars for Basque
  [ ] bcp47............... BCP-47 Language Tags
  [ ] biocreative_ppi..... BioCreAtIvE (Critical Assessment of Information
                           Extraction Systems in Biology)
  [ ] bllip_wsj_no_aux.... BLLIP Parser: WSJ Model
  [ 

True

In [6]:
import string
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [7]:
df_f = df[['case_enquiry_id', 'closure_reason','case_title']]


In [8]:
df_f.head()

Unnamed: 0,case_enquiry_id,closure_reason,case_title
0,101005897291,Case Closed. Closed date : Fri Jan 31 16:39:03...,Needle Pickup
1,101005897292,Case Closed. Closed date : Fri Jan 31 19:01:52...,Pick up Dead Animal
2,101005897280,Case Closed. Closed date : 2025-02-03 08:10:00...,Unsafe/Dangerous Conditions
3,101005922144,Case Closed. Closed date : Sat Feb 22 10:53:26...,Unshoveled Sidewalk
4,101005922146,,Sign Repair


In [9]:
def remove_pun(text):
    nonpun = "".join([char for char in text if char not in string.punctuation and not char.isdigit()])
    return nonpun.strip()

df_f['cr_clean'] = df_f['closure_reason'].apply(lambda x: remove_pun(x))
df_f.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_f['cr_clean'] = df_f['closure_reason'].apply(lambda x: remove_pun(x))


Unnamed: 0,case_enquiry_id,closure_reason,case_title,cr_clean
0,101005897291,Case Closed. Closed date : Fri Jan 31 16:39:03...,Needle Pickup,Case Closed Closed date Fri Jan EST Resolv...
1,101005897292,Case Closed. Closed date : Fri Jan 31 19:01:52...,Pick up Dead Animal,Case Closed Closed date Fri Jan EST Resolved
2,101005897280,Case Closed. Closed date : 2025-02-03 08:10:00...,Unsafe/Dangerous Conditions,Case Closed Closed date Duplicate of Existi...
3,101005922144,Case Closed. Closed date : Sat Feb 22 10:53:26...,Unshoveled Sidewalk,Case Closed Closed date Sat Feb EST Noted ...
4,101005922146,,Sign Repair,


In [10]:
def tokenize(text):
    token = re.split('\W+', text)
    return token

df_f['cr_tokenized'] = df_f['cr_clean'].apply(lambda x: tokenize(x.lower()))
df_f.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_f['cr_tokenized'] = df_f['cr_clean'].apply(lambda x: tokenize(x.lower()))


Unnamed: 0,case_enquiry_id,closure_reason,case_title,cr_clean,cr_tokenized
0,101005897291,Case Closed. Closed date : Fri Jan 31 16:39:03...,Needle Pickup,Case Closed Closed date Fri Jan EST Resolv...,"[case, closed, closed, date, fri, jan, est, re..."
1,101005897292,Case Closed. Closed date : Fri Jan 31 19:01:52...,Pick up Dead Animal,Case Closed Closed date Fri Jan EST Resolved,"[case, closed, closed, date, fri, jan, est, re..."
2,101005897280,Case Closed. Closed date : 2025-02-03 08:10:00...,Unsafe/Dangerous Conditions,Case Closed Closed date Duplicate of Existi...,"[case, closed, closed, date, duplicate, of, ex..."
3,101005922144,Case Closed. Closed date : Sat Feb 22 10:53:26...,Unshoveled Sidewalk,Case Closed Closed date Sat Feb EST Noted ...,"[case, closed, closed, date, sat, feb, est, no..."
4,101005922146,,Sign Repair,,[]


In [11]:
# Removing stop words
from nltk.corpus import stopwords
nltk.download('stopwords')
stopwords = stopwords.words('english')

def remove_sw(text_list):
    filtered = [word for word in text_list if word not in stopwords]
    return filtered

df_f['cr_filtered'] = df_f['cr_tokenized'].apply(lambda x: remove_sw(x))

df_f.head()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_f['cr_filtered'] = df_f['cr_tokenized'].apply(lambda x: remove_sw(x))


Unnamed: 0,case_enquiry_id,closure_reason,case_title,cr_clean,cr_tokenized,cr_filtered
0,101005897291,Case Closed. Closed date : Fri Jan 31 16:39:03...,Needle Pickup,Case Closed Closed date Fri Jan EST Resolv...,"[case, closed, closed, date, fri, jan, est, re...","[case, closed, closed, date, fri, jan, est, re..."
1,101005897292,Case Closed. Closed date : Fri Jan 31 19:01:52...,Pick up Dead Animal,Case Closed Closed date Fri Jan EST Resolved,"[case, closed, closed, date, fri, jan, est, re...","[case, closed, closed, date, fri, jan, est, re..."
2,101005897280,Case Closed. Closed date : 2025-02-03 08:10:00...,Unsafe/Dangerous Conditions,Case Closed Closed date Duplicate of Existi...,"[case, closed, closed, date, duplicate, of, ex...","[case, closed, closed, date, duplicate, existi..."
3,101005922144,Case Closed. Closed date : Sat Feb 22 10:53:26...,Unshoveled Sidewalk,Case Closed Closed date Sat Feb EST Noted ...,"[case, closed, closed, date, sat, feb, est, no...","[case, closed, closed, date, sat, feb, est, no..."
4,101005922146,,Sign Repair,,[],[]


In [12]:
nltk.download('wordnet')
lm = nltk.WordNetLemmatizer()

def lemmatize_words(text):
    word_lm = [lm.lemmatize(word) for word in text ]
    return word_lm

df_f['cr_lemmatized'] = df_f['cr_filtered'].apply(lambda x: lemmatize_words(x))

df_f.head()

[nltk_data] Downloading package wordnet to /root/nltk_data...
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_f['cr_lemmatized'] = df_f['cr_filtered'].apply(lambda x: lemmatize_words(x))


Unnamed: 0,case_enquiry_id,closure_reason,case_title,cr_clean,cr_tokenized,cr_filtered,cr_lemmatized
0,101005897291,Case Closed. Closed date : Fri Jan 31 16:39:03...,Needle Pickup,Case Closed Closed date Fri Jan EST Resolv...,"[case, closed, closed, date, fri, jan, est, re...","[case, closed, closed, date, fri, jan, est, re...","[case, closed, closed, date, fri, jan, est, re..."
1,101005897292,Case Closed. Closed date : Fri Jan 31 19:01:52...,Pick up Dead Animal,Case Closed Closed date Fri Jan EST Resolved,"[case, closed, closed, date, fri, jan, est, re...","[case, closed, closed, date, fri, jan, est, re...","[case, closed, closed, date, fri, jan, est, re..."
2,101005897280,Case Closed. Closed date : 2025-02-03 08:10:00...,Unsafe/Dangerous Conditions,Case Closed Closed date Duplicate of Existi...,"[case, closed, closed, date, duplicate, of, ex...","[case, closed, closed, date, duplicate, existi...","[case, closed, closed, date, duplicate, existi..."
3,101005922144,Case Closed. Closed date : Sat Feb 22 10:53:26...,Unshoveled Sidewalk,Case Closed Closed date Sat Feb EST Noted ...,"[case, closed, closed, date, sat, feb, est, no...","[case, closed, closed, date, sat, feb, est, no...","[case, closed, closed, date, sat, feb, est, no..."
4,101005922146,,Sign Repair,,[],[],[]


In [13]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Vectorization using TF-IDF
vectorizer = TfidfVectorizer()

# Join the lemmatized words back into a single string for each document
df_f['cr_lemmatized_text'] = df_f['cr_lemmatized'].apply(lambda x: ' '.join(x))

df_f['combined_text'] = df_f['cr_lemmatized_text'] + ' ' + df_f['case_title'].astype(str)

# Use the new 'cr_lemmatized_text' column for TF-IDF vectorization
tfidf_matrix = vectorizer.fit_transform(df_f['combined_text'])

# Get vector for the keyword sentence (medical-related terms)
keywords = "needle pickup drugs syringe opioid injection"
keyword_vector = vectorizer.transform([keywords])

# Compute cosine similarity between each row and the keyword vector
similarities = cosine_similarity(tfidf_matrix, keyword_vector)

# Filter based on similarity threshold (0.2 ensures relevance)
df_f['similarity'] = similarities


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_f['cr_lemmatized_text'] = df_f['cr_lemmatized'].apply(lambda x: ' '.join(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_f['combined_text'] = df_f['cr_lemmatized_text'] + ' ' + df_f['case_title'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_f['similarity'] = similarities


In [14]:
df_f['similarity'].describe()

Unnamed: 0,similarity
count,32000.0
mean,0.013057
std,0.070945
min,0.0
25%,0.0
50%,0.0
75%,0.0
max,0.763643


In [15]:
df_filtered = df_f[df_f['similarity'] > 0]

# Drop the similarity column after filtering
df_filtered = df_filtered.drop(columns=['similarity'])

In [16]:
df_final = df[df['case_enquiry_id'].isin(df_filtered['case_enquiry_id'])]
df_final.head()

Unnamed: 0,_id,case_enquiry_id,open_dt,sla_target_dt,closed_dt,on_time,case_status,closure_reason,case_title,subject,...,neighborhood,neighborhood_services_district,ward,precinct,location_street_name,location_zipcode,latitude,longitude,geom_4326,source
0,1,101005897291,2025-01-31 11:02:14,,2025-01-31 11:39:03,ONTIME,Closed,Case Closed. Closed date : Fri Jan 31 16:39:03...,Needle Pickup,Mayor's 24 Hour Hotline,...,Dorchester,7,13,1303,151 Mount Vernon St,2125.0,42.320813,-71.051774,0101000020E6100000B1223E4350C351C06A1DE2641029...,Citizens Connect App
48,49,101005981813,2025-03-24 16:52:49,,2025-03-25 03:08:42,ONTIME,Closed,Case Closed. Closed date : Tue Mar 25 07:08:42...,Needle Pickup,Mayor's 24 Hour Hotline,...,Roxbury,6,09,902,68 W Springfield St,2118.0,42.337132,-71.076843,0101000020E61000006988DDFEEAC451C04C8DD127272B...,Employee Generated
71,72,101005977491,2025-03-20 04:29:26,,2025-03-20 04:50:16,ONTIME,Closed,Case Closed. Closed date : Thu Mar 20 08:50:16...,Needle Pickup,Mayor's 24 Hour Hotline,...,Dorchester,13,Ward 7,710,11 Wendover St,2125.0,42.31828,-71.066371,0101000020E61000008D41486A3FC451C0A3F93E62BD28...,Citizens Connect App
74,75,101005977495,2025-03-20 04:31:10,,2025-03-20 04:38:59,ONTIME,Closed,Case Closed. Closed date : Thu Mar 20 08:38:59...,Needle Pickup,Mayor's 24 Hour Hotline,...,South End,6,Ward 9,902,515 Shawmut Ave,2118.0,42.33789,-71.077591,0101000020E610000073B04B3EF7C451C065D7F5F63F2B...,Employee Generated
99,100,101005897276,2025-01-31 10:45:39,,2025-01-31 11:28:38,ONTIME,Closed,Case Closed. Closed date : Fri Jan 31 16:28:38...,Needle Pickup,Mayor's 24 Hour Hotline,...,South End,6,08,802,850 Harrison Ave,2118.0,42.334464,-71.074118,0101000020E6100000DCE0665ABEC451C0D292ADB8CF2A...,Citizens Connect App


In [17]:
df_final.shape

(1198, 31)

In [22]:
del df_final

In [None]:
# df_final.to_csv("boston311_final.csv", index=False)

In [18]:
df_final.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1198 entries, 0 to 31882
Data columns (total 31 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   _id                             1198 non-null   int64  
 1   case_enquiry_id                 1198 non-null   int64  
 2   open_dt                         1198 non-null   object 
 3   sla_target_dt                   230 non-null    object 
 4   closed_dt                       1197 non-null   object 
 5   on_time                         1198 non-null   object 
 6   case_status                     1198 non-null   object 
 7   closure_reason                  1198 non-null   object 
 8   case_title                      1198 non-null   object 
 9   subject                         1198 non-null   object 
 10  reason                          1198 non-null   object 
 11  type                            1198 non-null   object 
 12  queue                           1198 n

Data Cleaning

In [19]:
df_final.drop(['geom_4326','submitted_photo','closed_photo','location'],axis=1,inplace=True)
df_final['location_zipcode'] = df_final['location_zipcode'].astype(str)
df_final['ward_number'] = df_final["ward"].str.extract(r'(\d+)', expand=False).fillna(0).astype(int)
df_final["queue_name"] = df_final["queue"].str.split('_').str[1]

# Date conversion and date extraction
df_final["open_date"] = pd.to_datetime(df_final["open_dt"], format="%Y-%m-%d %H:%M:%S").dt.date
df_final["sla_target_dt"] = pd.to_datetime(df_final["sla_target_dt"], format="%Y-%m-%d %H:%M:%S").dt.date
df_final["closed_date"] = pd.to_datetime(df_final["closed_dt"], format="%Y-%m-%d %H:%M:%S").dt.date

df_final["closed_date"] = pd.to_datetime(df_final["closed_date"])
df_final["open_date"] = pd.to_datetime(df_final["open_date"])

cols_to_int = ["fire_district", "city_council_district", "neighborhood_services_district"]
for col in cols_to_int:
    df_final[col] = pd.to_numeric(df_final[col].astype(str).str.strip(), errors='coerce').astype('Int64')


df_final["location_zipcode"] = df_final["location_zipcode"].astype(str).str.replace('.0', '', regex=False).str.replace('nan', '', regex=False)
# Convert to Int64, fill NaNs with 0, convert back to string, and pad with leading zeros
df_final["location_zipcode"] = pd.to_numeric(df_final["location_zipcode"], errors='coerce').fillna(0).astype('Int64').astype(str).str.zfill(5)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final.drop(['geom_4326','submitted_photo','closed_photo','location'],axis=1,inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final['location_zipcode'] = df_final['location_zipcode'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final['ward_number'] = df_final["ward"].str.extract(r'(\d+)', expand=False).fillna(0).astype(i

In [20]:
df_final.drop(['location_street_name','_id','queue','ward'],axis=1,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final.drop(['location_street_name','_id','queue','ward'],axis=1,inplace=True)


In [21]:
df_final.head()

Unnamed: 0,case_enquiry_id,open_dt,sla_target_dt,closed_dt,on_time,case_status,closure_reason,case_title,subject,reason,...,neighborhood_services_district,precinct,location_zipcode,latitude,longitude,source,ward_number,queue_name,open_date,closed_date
0,101005897291,2025-01-31 11:02:14,NaT,2025-01-31 11:39:03,ONTIME,Closed,Case Closed. Closed date : Fri Jan 31 16:39:03...,Needle Pickup,Mayor's 24 Hour Hotline,Needle Program,...,7,1303,2125,42.320813,-71.051774,Citizens Connect App,13,Needle,2025-01-31,2025-01-31
48,101005981813,2025-03-24 16:52:49,NaT,2025-03-25 03:08:42,ONTIME,Closed,Case Closed. Closed date : Tue Mar 25 07:08:42...,Needle Pickup,Mayor's 24 Hour Hotline,Needle Program,...,6,902,2118,42.337132,-71.076843,Employee Generated,9,Needle,2025-03-24,2025-03-25
71,101005977491,2025-03-20 04:29:26,NaT,2025-03-20 04:50:16,ONTIME,Closed,Case Closed. Closed date : Thu Mar 20 08:50:16...,Needle Pickup,Mayor's 24 Hour Hotline,Needle Program,...,13,710,2125,42.31828,-71.066371,Citizens Connect App,7,Needle,2025-03-20,2025-03-20
74,101005977495,2025-03-20 04:31:10,NaT,2025-03-20 04:38:59,ONTIME,Closed,Case Closed. Closed date : Thu Mar 20 08:38:59...,Needle Pickup,Mayor's 24 Hour Hotline,Needle Program,...,6,902,2118,42.33789,-71.077591,Employee Generated,9,Needle,2025-03-20,2025-03-20
99,101005897276,2025-01-31 10:45:39,NaT,2025-01-31 11:28:38,ONTIME,Closed,Case Closed. Closed date : Fri Jan 31 16:28:38...,Needle Pickup,Mayor's 24 Hour Hotline,Needle Program,...,6,802,2118,42.334464,-71.074118,Citizens Connect App,8,Needle,2025-01-31,2025-01-31


In [23]:
df_hist = pd.read_csv("boston311_final.csv")
df_hist.head()

Unnamed: 0,case_enquiry_id,open_dt,sla_target_dt,closed_dt,on_time,case_status,closure_reason,case_title,subject,reason,...,neighborhood_services_district,precinct,location_zipcode,latitude,longitude,source,ward_number,queue_name,open_date,closed_date
0,101005402776,2024-04-08 11:09:17,,2024-04-08 13:30:46,ONTIME,Closed,Case Closed. Closed date : Mon Apr 08 17:30:46...,Needle Pickup,Mayor's 24 Hour Hotline,Needle Program,...,5.0,601,2127,42.34209,-71.05636,Citizens Connect App,6,Needle,2024-04-08,2024-04-08
1,101005402777,2024-04-08 11:10:05,,2024-04-08 11:38:16,ONTIME,Closed,Case Closed. Closed date : Mon Apr 08 15:38:16...,Needle Pickup,Mayor's 24 Hour Hotline,Needle Program,...,6.0,404,0,42.341127,-71.081153,Citizens Connect App,4,Needle,2024-04-08,2024-04-08
2,101005357059,2024-03-19 06:57:55,,2024-03-19 07:10:23,ONTIME,Closed,Case Closed. Closed date : Tue Mar 19 11:10:23...,Needle Pickup,Mayor's 24 Hour Hotline,Needle Program,...,6.0,404,0,42.342563,-71.081864,Citizens Connect App,4,Needle,2024-03-19,2024-03-19
3,101005356895,2024-03-19 05:40:10,,2024-03-19 05:49:12,ONTIME,Closed,Case Closed. Closed date : Tue Mar 19 09:49:12...,Needle Pickup,Mayor's 24 Hour Hotline,Needle Program,...,3.0,306,2108,42.358549,-71.05942,Citizens Connect App,3,Needle,2024-03-19,2024-03-19
4,101005579920,2024-07-22 14:11:00,,2024-07-22 14:37:14,ONTIME,Closed,Case Closed. Closed date : Mon Jul 22 18:37:14...,Needle Pickup,Mayor's 24 Hour Hotline,Needle Program,...,6.0,901,0,42.33877,-71.074724,Citizens Connect App,9,Needle,2024-07-22,2024-07-22


In [24]:
df_hist.shape

(76683, 27)

In [25]:
# Concatenate the dataframes.
df_cleaned = pd.concat([df_hist, df_final], ignore_index=True)
# Save the combined dataframe.
df_cleaned.to_csv("boston311_final.csv", index=False)
df_cleaned.shape

(77881, 27)

In [26]:
df_cleaned.head()

Unnamed: 0,case_enquiry_id,open_dt,sla_target_dt,closed_dt,on_time,case_status,closure_reason,case_title,subject,reason,...,neighborhood_services_district,precinct,location_zipcode,latitude,longitude,source,ward_number,queue_name,open_date,closed_date
0,101005402776,2024-04-08 11:09:17,,2024-04-08 13:30:46,ONTIME,Closed,Case Closed. Closed date : Mon Apr 08 17:30:46...,Needle Pickup,Mayor's 24 Hour Hotline,Needle Program,...,5.0,601,2127,42.34209,-71.05636,Citizens Connect App,6,Needle,2024-04-08,2024-04-08
1,101005402777,2024-04-08 11:10:05,,2024-04-08 11:38:16,ONTIME,Closed,Case Closed. Closed date : Mon Apr 08 15:38:16...,Needle Pickup,Mayor's 24 Hour Hotline,Needle Program,...,6.0,404,0,42.341127,-71.081153,Citizens Connect App,4,Needle,2024-04-08,2024-04-08
2,101005357059,2024-03-19 06:57:55,,2024-03-19 07:10:23,ONTIME,Closed,Case Closed. Closed date : Tue Mar 19 11:10:23...,Needle Pickup,Mayor's 24 Hour Hotline,Needle Program,...,6.0,404,0,42.342563,-71.081864,Citizens Connect App,4,Needle,2024-03-19,2024-03-19
3,101005356895,2024-03-19 05:40:10,,2024-03-19 05:49:12,ONTIME,Closed,Case Closed. Closed date : Tue Mar 19 09:49:12...,Needle Pickup,Mayor's 24 Hour Hotline,Needle Program,...,3.0,306,2108,42.358549,-71.05942,Citizens Connect App,3,Needle,2024-03-19,2024-03-19
4,101005579920,2024-07-22 14:11:00,,2024-07-22 14:37:14,ONTIME,Closed,Case Closed. Closed date : Mon Jul 22 18:37:14...,Needle Pickup,Mayor's 24 Hour Hotline,Needle Program,...,6.0,901,0,42.33877,-71.074724,Citizens Connect App,9,Needle,2024-07-22,2024-07-22


In [27]:
df_final.to_csv("boston311_new_upload.csv", index=False)
df_final.shape

(1198, 27)