In [1]:
import requests
import csv
import time
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [2]:
BASE_URL = "https://datahub.transportation.gov/resource/6axg-epim.json"
LIMIT = 100000
OUTPUT_FILE = "vehicle_recalls.csv"

def fetch_recalls():
    all_data = []
    offset = 0

    while True:
        url = f"{BASE_URL}?$limit={LIMIT}&$offset={offset}"
        try:
            response = requests.get(url)
            response.raise_for_status()
            data = response.json()

            if not data:
                break  # Stop if no more data is returned

            all_data.extend(data)
            offset += LIMIT  # Move to the next batch
            time.sleep(1)  # Avoid rate limiting

        except requests.exceptions.RequestException as e:
            print(f"Error fetching recall data: {e}")
            break

    return all_data

def save_to_csv(data, filename):
    if not data:
        print("No data to save.")
        return

    all_keys = set()
    for entry in data:
        all_keys.update(entry.keys())

    with open(filename, "w", newline="") as file:
        writer = csv.DictWriter(file, fieldnames=list(all_keys))
        writer.writeheader()
        for entry in data:
            writer.writerow({key: entry.get(key, '') for key in all_keys})

In [3]:
if __name__ == "__main__":
    recalls = fetch_recalls()
    save_to_csv(recalls, OUTPUT_FILE)
    print(f"Recall data saved to {OUTPUT_FILE}")

Recall data saved to vehicle_recalls.csv


In [4]:
df = pd.read_csv('/Users/sarthak/Documents/NHTSA/data/vehicle_recalls.csv')

In [5]:
df

Unnamed: 0,recall_link,defect_summary,nhtsa_id,subject,report_received_date,potentially_affected,consequence_summary,corrective_action,manufacturer,fire_risk_when_parked,do_not_drive,component,mfr_campaign_number,completion_rate,recall_type
0,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,"Volkswagen Group of America, Inc. (Volkswagen)...",25V434000,Loss of Vehicle Control from Loose Brake and S...,2025-06-26T00:00:00.000,546,A loss of vehicle control increases the risk o...,"Dealers will replace the bolts as necessary, f...","Volkswagen Group of America, Inc.",No,No,,42HX,,Vehicle
1,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,"General Motors, LLC (GM) is recalling certain ...",25V433000,Damaged Parking Brake Wiring Harness,2025-06-26T00:00:00.000,40233,Unintended activation of the parking brake whi...,"Dealers will inspect the harness and, if neces...","General Motors, LLC",No,No,,N252503010,,Vehicle
2,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,General Motors (GM) is recalling certain 2018 ...,25V432000,Roof Rail Air Bag Inflator Endcap May Detach,2025-06-25T00:00:00.000,1658,A detached end cap or inflator rupture can all...,Dealers will replace the left and right side R...,"General Motors, LLC",No,No,,N252513060,,Vehicle
3,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,Newmar Corporation (Newmar) is recalling certa...,25V431000,Baggage Door Skins May Come Loose and Become a...,2025-06-25T00:00:00.000,541,"A detached door skin can become a road hazard,...",Dealers will inspect and add fasteners and adh...,Newmar Corporation,No,No,,619 RSB,,Vehicle
4,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,"Chrysler (FCA US, LLC) is recalling certain 20...",25V430000,Inoperative Defrosting & Defogging System/FMVS...,2025-06-25T00:00:00.000,1689,An inoperative windshield defrosting and defog...,Dealers will inspect and replace the blower mo...,"Chrysler (FCA US, LLC)",No,No,VISIBILITY,49C,,Vehicle
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29129,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,,66V003000,POWER TRAIN:TRANSMISSION:STANDARD:MANUAL,1966-09-29T00:00:00.000,18572,,,Honda (American Honda Motor Co.),No,No,POWER TRAIN,NR (Not Reported),,Vehicle
29130,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,POSSIBILITY THAT THE FRONT BRAKE HOSE END FITT...,66V178000,BRAKES:HYDRAULIC:LINES:HOSE:NON-METALLIC,1966-09-19T00:00:00.000,125,IF SUCH LEAKAGE OCCURS AND IF ENOUGH BRAKE FLU...,(REPLACE FRONT BRAKE HOSES AND SEALS.),"RENAULT, INCORPORATED",No,No,"SERVICE BRAKES, HYDRAULIC",NR (Not Reported),,Vehicle
29131,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,,66V032001,STEERING:COLUMN,1966-01-19T00:00:00.000,138878,,,"General Motors, LLC",No,No,STEERING,NR (Not Reported),,Vehicle
29132,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,,66V032003,STEERING:COLUMN,1966-01-19T00:00:00.000,70644,,,"General Motors, LLC",No,No,STEERING,NR (Not Reported),,Vehicle


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29134 entries, 0 to 29133
Data columns (total 15 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   recall_link            29134 non-null  object 
 1   defect_summary         26733 non-null  object 
 2   nhtsa_id               29134 non-null  object 
 3   subject                29134 non-null  object 
 4   report_received_date   29134 non-null  object 
 5   potentially_affected   29134 non-null  int64  
 6   consequence_summary    24246 non-null  object 
 7   corrective_action      26746 non-null  object 
 8   manufacturer           29134 non-null  object 
 9   fire_risk_when_parked  29134 non-null  object 
 10  do_not_drive           29134 non-null  object 
 11  component              21622 non-null  object 
 12  mfr_campaign_number    29055 non-null  object 
 13  completion_rate        10435 non-null  float64
 14  recall_type            29134 non-null  object 
dtypes:

In [7]:
df.isnull().sum()

recall_link                  0
defect_summary            2401
nhtsa_id                     0
subject                      0
report_received_date         0
potentially_affected         0
consequence_summary       4888
corrective_action         2388
manufacturer                 0
fire_risk_when_parked        0
do_not_drive                 0
component                 7512
mfr_campaign_number         79
completion_rate          18699
recall_type                  0
dtype: int64

In [8]:
df

Unnamed: 0,recall_link,defect_summary,nhtsa_id,subject,report_received_date,potentially_affected,consequence_summary,corrective_action,manufacturer,fire_risk_when_parked,do_not_drive,component,mfr_campaign_number,completion_rate,recall_type
0,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,"Volkswagen Group of America, Inc. (Volkswagen)...",25V434000,Loss of Vehicle Control from Loose Brake and S...,2025-06-26T00:00:00.000,546,A loss of vehicle control increases the risk o...,"Dealers will replace the bolts as necessary, f...","Volkswagen Group of America, Inc.",No,No,,42HX,,Vehicle
1,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,"General Motors, LLC (GM) is recalling certain ...",25V433000,Damaged Parking Brake Wiring Harness,2025-06-26T00:00:00.000,40233,Unintended activation of the parking brake whi...,"Dealers will inspect the harness and, if neces...","General Motors, LLC",No,No,,N252503010,,Vehicle
2,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,General Motors (GM) is recalling certain 2018 ...,25V432000,Roof Rail Air Bag Inflator Endcap May Detach,2025-06-25T00:00:00.000,1658,A detached end cap or inflator rupture can all...,Dealers will replace the left and right side R...,"General Motors, LLC",No,No,,N252513060,,Vehicle
3,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,Newmar Corporation (Newmar) is recalling certa...,25V431000,Baggage Door Skins May Come Loose and Become a...,2025-06-25T00:00:00.000,541,"A detached door skin can become a road hazard,...",Dealers will inspect and add fasteners and adh...,Newmar Corporation,No,No,,619 RSB,,Vehicle
4,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,"Chrysler (FCA US, LLC) is recalling certain 20...",25V430000,Inoperative Defrosting & Defogging System/FMVS...,2025-06-25T00:00:00.000,1689,An inoperative windshield defrosting and defog...,Dealers will inspect and replace the blower mo...,"Chrysler (FCA US, LLC)",No,No,VISIBILITY,49C,,Vehicle
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29129,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,,66V003000,POWER TRAIN:TRANSMISSION:STANDARD:MANUAL,1966-09-29T00:00:00.000,18572,,,Honda (American Honda Motor Co.),No,No,POWER TRAIN,NR (Not Reported),,Vehicle
29130,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,POSSIBILITY THAT THE FRONT BRAKE HOSE END FITT...,66V178000,BRAKES:HYDRAULIC:LINES:HOSE:NON-METALLIC,1966-09-19T00:00:00.000,125,IF SUCH LEAKAGE OCCURS AND IF ENOUGH BRAKE FLU...,(REPLACE FRONT BRAKE HOSES AND SEALS.),"RENAULT, INCORPORATED",No,No,"SERVICE BRAKES, HYDRAULIC",NR (Not Reported),,Vehicle
29131,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,,66V032001,STEERING:COLUMN,1966-01-19T00:00:00.000,138878,,,"General Motors, LLC",No,No,STEERING,NR (Not Reported),,Vehicle
29132,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,,66V032003,STEERING:COLUMN,1966-01-19T00:00:00.000,70644,,,"General Motors, LLC",No,No,STEERING,NR (Not Reported),,Vehicle


In [9]:
print(df['corrective_action'].head(1).values)

["Dealers will replace the bolts as necessary, free of charge. Owner notification letters are expected to be mailed August 22, 2025. Owners may contact Audi customer service at 1-800-253-2834. Volkswagen's number for this recall is 42HX."]


In [10]:
df[df['defect_summary'].isnull()]['corrective_action'].values

array([nan, nan, nan, ..., nan, nan, nan], dtype=object)

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29134 entries, 0 to 29133
Data columns (total 15 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   recall_link            29134 non-null  object 
 1   defect_summary         26733 non-null  object 
 2   nhtsa_id               29134 non-null  object 
 3   subject                29134 non-null  object 
 4   report_received_date   29134 non-null  object 
 5   potentially_affected   29134 non-null  int64  
 6   consequence_summary    24246 non-null  object 
 7   corrective_action      26746 non-null  object 
 8   manufacturer           29134 non-null  object 
 9   fire_risk_when_parked  29134 non-null  object 
 10  do_not_drive           29134 non-null  object 
 11  component              21622 non-null  object 
 12  mfr_campaign_number    29055 non-null  object 
 13  completion_rate        10435 non-null  float64
 14  recall_type            29134 non-null  object 
dtypes:

In [12]:
df['report_received_date'] = pd.to_datetime(df['report_received_date'])

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29134 entries, 0 to 29133
Data columns (total 15 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   recall_link            29134 non-null  object        
 1   defect_summary         26733 non-null  object        
 2   nhtsa_id               29134 non-null  object        
 3   subject                29134 non-null  object        
 4   report_received_date   29134 non-null  datetime64[ns]
 5   potentially_affected   29134 non-null  int64         
 6   consequence_summary    24246 non-null  object        
 7   corrective_action      26746 non-null  object        
 8   manufacturer           29134 non-null  object        
 9   fire_risk_when_parked  29134 non-null  object        
 10  do_not_drive           29134 non-null  object        
 11  component              21622 non-null  object        
 12  mfr_campaign_number    29055 non-null  object        
 13  c

In [14]:
df['year'] = df['report_received_date'].dt.year

In [15]:
df['year_month'] = df['report_received_date'].dt.to_period('M')

In [16]:
df

Unnamed: 0,recall_link,defect_summary,nhtsa_id,subject,report_received_date,potentially_affected,consequence_summary,corrective_action,manufacturer,fire_risk_when_parked,do_not_drive,component,mfr_campaign_number,completion_rate,recall_type,year,year_month
0,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,"Volkswagen Group of America, Inc. (Volkswagen)...",25V434000,Loss of Vehicle Control from Loose Brake and S...,2025-06-26,546,A loss of vehicle control increases the risk o...,"Dealers will replace the bolts as necessary, f...","Volkswagen Group of America, Inc.",No,No,,42HX,,Vehicle,2025,2025-06
1,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,"General Motors, LLC (GM) is recalling certain ...",25V433000,Damaged Parking Brake Wiring Harness,2025-06-26,40233,Unintended activation of the parking brake whi...,"Dealers will inspect the harness and, if neces...","General Motors, LLC",No,No,,N252503010,,Vehicle,2025,2025-06
2,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,General Motors (GM) is recalling certain 2018 ...,25V432000,Roof Rail Air Bag Inflator Endcap May Detach,2025-06-25,1658,A detached end cap or inflator rupture can all...,Dealers will replace the left and right side R...,"General Motors, LLC",No,No,,N252513060,,Vehicle,2025,2025-06
3,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,Newmar Corporation (Newmar) is recalling certa...,25V431000,Baggage Door Skins May Come Loose and Become a...,2025-06-25,541,"A detached door skin can become a road hazard,...",Dealers will inspect and add fasteners and adh...,Newmar Corporation,No,No,,619 RSB,,Vehicle,2025,2025-06
4,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,"Chrysler (FCA US, LLC) is recalling certain 20...",25V430000,Inoperative Defrosting & Defogging System/FMVS...,2025-06-25,1689,An inoperative windshield defrosting and defog...,Dealers will inspect and replace the blower mo...,"Chrysler (FCA US, LLC)",No,No,VISIBILITY,49C,,Vehicle,2025,2025-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29129,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,,66V003000,POWER TRAIN:TRANSMISSION:STANDARD:MANUAL,1966-09-29,18572,,,Honda (American Honda Motor Co.),No,No,POWER TRAIN,NR (Not Reported),,Vehicle,1966,1966-09
29130,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,POSSIBILITY THAT THE FRONT BRAKE HOSE END FITT...,66V178000,BRAKES:HYDRAULIC:LINES:HOSE:NON-METALLIC,1966-09-19,125,IF SUCH LEAKAGE OCCURS AND IF ENOUGH BRAKE FLU...,(REPLACE FRONT BRAKE HOSES AND SEALS.),"RENAULT, INCORPORATED",No,No,"SERVICE BRAKES, HYDRAULIC",NR (Not Reported),,Vehicle,1966,1966-09
29131,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,,66V032001,STEERING:COLUMN,1966-01-19,138878,,,"General Motors, LLC",No,No,STEERING,NR (Not Reported),,Vehicle,1966,1966-01
29132,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,,66V032003,STEERING:COLUMN,1966-01-19,70644,,,"General Motors, LLC",No,No,STEERING,NR (Not Reported),,Vehicle,1966,1966-01


In [17]:
df['year'].isnull().sum()

0

In [18]:
df['year'].info()

<class 'pandas.core.series.Series'>
RangeIndex: 29134 entries, 0 to 29133
Series name: year
Non-Null Count  Dtype
--------------  -----
29134 non-null  int32
dtypes: int32(1)
memory usage: 113.9 KB


In [19]:
df['completion_rate'].isnull().sum()

18699

In [20]:
df.drop(columns='completion_rate',inplace = True)

In [21]:
df.isnull().sum()

recall_link                 0
defect_summary           2401
nhtsa_id                    0
subject                     0
report_received_date        0
potentially_affected        0
consequence_summary      4888
corrective_action        2388
manufacturer                0
fire_risk_when_parked       0
do_not_drive                0
component                7512
mfr_campaign_number        79
recall_type                 0
year                        0
year_month                  0
dtype: int64

In [22]:
df.head(5)

Unnamed: 0,recall_link,defect_summary,nhtsa_id,subject,report_received_date,potentially_affected,consequence_summary,corrective_action,manufacturer,fire_risk_when_parked,do_not_drive,component,mfr_campaign_number,recall_type,year,year_month
0,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,"Volkswagen Group of America, Inc. (Volkswagen)...",25V434000,Loss of Vehicle Control from Loose Brake and S...,2025-06-26,546,A loss of vehicle control increases the risk o...,"Dealers will replace the bolts as necessary, f...","Volkswagen Group of America, Inc.",No,No,,42HX,Vehicle,2025,2025-06
1,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,"General Motors, LLC (GM) is recalling certain ...",25V433000,Damaged Parking Brake Wiring Harness,2025-06-26,40233,Unintended activation of the parking brake whi...,"Dealers will inspect the harness and, if neces...","General Motors, LLC",No,No,,N252503010,Vehicle,2025,2025-06
2,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,General Motors (GM) is recalling certain 2018 ...,25V432000,Roof Rail Air Bag Inflator Endcap May Detach,2025-06-25,1658,A detached end cap or inflator rupture can all...,Dealers will replace the left and right side R...,"General Motors, LLC",No,No,,N252513060,Vehicle,2025,2025-06
3,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,Newmar Corporation (Newmar) is recalling certa...,25V431000,Baggage Door Skins May Come Loose and Become a...,2025-06-25,541,"A detached door skin can become a road hazard,...",Dealers will inspect and add fasteners and adh...,Newmar Corporation,No,No,,619 RSB,Vehicle,2025,2025-06
4,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,"Chrysler (FCA US, LLC) is recalling certain 20...",25V430000,Inoperative Defrosting & Defogging System/FMVS...,2025-06-25,1689,An inoperative windshield defrosting and defog...,Dealers will inspect and replace the blower mo...,"Chrysler (FCA US, LLC)",No,No,VISIBILITY,49C,Vehicle,2025,2025-06


In [23]:
df[df['potentially_affected'].isnull()].head(5)

Unnamed: 0,recall_link,defect_summary,nhtsa_id,subject,report_received_date,potentially_affected,consequence_summary,corrective_action,manufacturer,fire_risk_when_parked,do_not_drive,component,mfr_campaign_number,recall_type,year,year_month


In [24]:
pa = df['potentially_affected'].isnull().sum()
cn = (df['mfr_campaign_number']== "NR (Not Reported)").sum()
coct = ((df['potentially_affected'].isnull()) & (df['mfr_campaign_number']== "NR (Not Reported)")).sum()

In [25]:
print("Total number of missing potentially_affected:", pa)
print("Total number of missing campaign_number:", cn)
print("Union of both:", coct)

Total number of missing potentially_affected: 0
Total number of missing campaign_number: 16649
Union of both: 0


In [26]:
df['component'].isnull().sum()

7512

In [27]:
df[df['component'].isnull()]

Unnamed: 0,recall_link,defect_summary,nhtsa_id,subject,report_received_date,potentially_affected,consequence_summary,corrective_action,manufacturer,fire_risk_when_parked,do_not_drive,component,mfr_campaign_number,recall_type,year,year_month
0,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,"Volkswagen Group of America, Inc. (Volkswagen)...",25V434000,Loss of Vehicle Control from Loose Brake and S...,2025-06-26,546,A loss of vehicle control increases the risk o...,"Dealers will replace the bolts as necessary, f...","Volkswagen Group of America, Inc.",No,No,,42HX,Vehicle,2025,2025-06
1,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,"General Motors, LLC (GM) is recalling certain ...",25V433000,Damaged Parking Brake Wiring Harness,2025-06-26,40233,Unintended activation of the parking brake whi...,"Dealers will inspect the harness and, if neces...","General Motors, LLC",No,No,,N252503010,Vehicle,2025,2025-06
2,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,General Motors (GM) is recalling certain 2018 ...,25V432000,Roof Rail Air Bag Inflator Endcap May Detach,2025-06-25,1658,A detached end cap or inflator rupture can all...,Dealers will replace the left and right side R...,"General Motors, LLC",No,No,,N252513060,Vehicle,2025,2025-06
3,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,Newmar Corporation (Newmar) is recalling certa...,25V431000,Baggage Door Skins May Come Loose and Become a...,2025-06-25,541,"A detached door skin can become a road hazard,...",Dealers will inspect and add fasteners and adh...,Newmar Corporation,No,No,,619 RSB,Vehicle,2025,2025-06
8,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,"Forest River, Inc. (Forest River) is recalling...",25V425000,Patio Door Transit Latch Cannot be Unlocked fr...,2025-06-23,126,A door that remains locked can prevent or dela...,Dealers will remove the exterior transit latch...,"Forest River, Inc.",No,No,,69-1945,Vehicle,2025,2025-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29105,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,,66V012000,FUEL:THROTTLE LINKAGES AND CONTROL,1966-10-18,47300,,,Ford Motor Company,No,No,,NR (Not Reported),Vehicle,1966,1966-10
29106,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,,66V013000,WHEELS,1966-10-18,71,,,"Chrysler (FCA US, LLC)",No,No,,NR (Not Reported),Vehicle,1966,1966-10
29107,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,,66V010000,FUEL:THROTTLE LINKAGES AND CONTROL,1966-10-13,428,,,"Chrysler (FCA US, LLC)",No,No,,NR (Not Reported),Vehicle,1966,1966-10
29111,{'url': 'https://www.nhtsa.gov/recalls?nhtsaId...,,66V007000,WHEELS,1966-10-12,9869,,,"Chrysler (FCA US, LLC)",No,No,,NR (Not Reported),Vehicle,1966,1966-10


In [28]:
df[(df['potentially_affected'].isnull()) & (df['mfr_campaign_number']!= "NR (Not Reported)")]

Unnamed: 0,recall_link,defect_summary,nhtsa_id,subject,report_received_date,potentially_affected,consequence_summary,corrective_action,manufacturer,fire_risk_when_parked,do_not_drive,component,mfr_campaign_number,recall_type,year,year_month


In [29]:
df.isnull().sum()

recall_link                 0
defect_summary           2401
nhtsa_id                    0
subject                     0
report_received_date        0
potentially_affected        0
consequence_summary      4888
corrective_action        2388
manufacturer                0
fire_risk_when_parked       0
do_not_drive                0
component                7512
mfr_campaign_number        79
recall_type                 0
year                        0
year_month                  0
dtype: int64

In [30]:
df.dropna(subset=['potentially_affected','mfr_campaign_number'], inplace=True)

In [31]:
df.isnull().sum()

recall_link                 0
defect_summary           2401
nhtsa_id                    0
subject                     0
report_received_date        0
potentially_affected        0
consequence_summary      4888
corrective_action        2388
manufacturer                0
fire_risk_when_parked       0
do_not_drive                0
component                7491
mfr_campaign_number         0
recall_type                 0
year                        0
year_month                  0
dtype: int64

In [32]:
text_cols = ['consequence_summary', 'corrective_action', 'defect_summary','component']
df[text_cols] = df[text_cols].fillna('Unknown')

In [33]:
df.isnull().sum()

recall_link              0
defect_summary           0
nhtsa_id                 0
subject                  0
report_received_date     0
potentially_affected     0
consequence_summary      0
corrective_action        0
manufacturer             0
fire_risk_when_parked    0
do_not_drive             0
component                0
mfr_campaign_number      0
recall_type              0
year                     0
year_month               0
dtype: int64

In [34]:
df['fire_risk_when_parked'].value_counts()

fire_risk_when_parked
No     28981
Yes       74
Name: count, dtype: int64

In [35]:
len(df['recall_link']) - df['recall_link'].nunique()

0

In [36]:
df['recall_link'].head(1).values

array(["{'url': 'https://www.nhtsa.gov/recalls?nhtsaId=25V434000', 'description': 'Go to Recall'}"],
      dtype=object)

In [37]:
df.drop(columns=['recall_link'],inplace = True)

In [38]:
df

Unnamed: 0,defect_summary,nhtsa_id,subject,report_received_date,potentially_affected,consequence_summary,corrective_action,manufacturer,fire_risk_when_parked,do_not_drive,component,mfr_campaign_number,recall_type,year,year_month
0,"Volkswagen Group of America, Inc. (Volkswagen)...",25V434000,Loss of Vehicle Control from Loose Brake and S...,2025-06-26,546,A loss of vehicle control increases the risk o...,"Dealers will replace the bolts as necessary, f...","Volkswagen Group of America, Inc.",No,No,Unknown,42HX,Vehicle,2025,2025-06
1,"General Motors, LLC (GM) is recalling certain ...",25V433000,Damaged Parking Brake Wiring Harness,2025-06-26,40233,Unintended activation of the parking brake whi...,"Dealers will inspect the harness and, if neces...","General Motors, LLC",No,No,Unknown,N252503010,Vehicle,2025,2025-06
2,General Motors (GM) is recalling certain 2018 ...,25V432000,Roof Rail Air Bag Inflator Endcap May Detach,2025-06-25,1658,A detached end cap or inflator rupture can all...,Dealers will replace the left and right side R...,"General Motors, LLC",No,No,Unknown,N252513060,Vehicle,2025,2025-06
3,Newmar Corporation (Newmar) is recalling certa...,25V431000,Baggage Door Skins May Come Loose and Become a...,2025-06-25,541,"A detached door skin can become a road hazard,...",Dealers will inspect and add fasteners and adh...,Newmar Corporation,No,No,Unknown,619 RSB,Vehicle,2025,2025-06
4,"Chrysler (FCA US, LLC) is recalling certain 20...",25V430000,Inoperative Defrosting & Defogging System/FMVS...,2025-06-25,1689,An inoperative windshield defrosting and defog...,Dealers will inspect and replace the blower mo...,"Chrysler (FCA US, LLC)",No,No,VISIBILITY,49C,Vehicle,2025,2025-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29129,Unknown,66V003000,POWER TRAIN:TRANSMISSION:STANDARD:MANUAL,1966-09-29,18572,Unknown,Unknown,Honda (American Honda Motor Co.),No,No,POWER TRAIN,NR (Not Reported),Vehicle,1966,1966-09
29130,POSSIBILITY THAT THE FRONT BRAKE HOSE END FITT...,66V178000,BRAKES:HYDRAULIC:LINES:HOSE:NON-METALLIC,1966-09-19,125,IF SUCH LEAKAGE OCCURS AND IF ENOUGH BRAKE FLU...,(REPLACE FRONT BRAKE HOSES AND SEALS.),"RENAULT, INCORPORATED",No,No,"SERVICE BRAKES, HYDRAULIC",NR (Not Reported),Vehicle,1966,1966-09
29131,Unknown,66V032001,STEERING:COLUMN,1966-01-19,138878,Unknown,Unknown,"General Motors, LLC",No,No,STEERING,NR (Not Reported),Vehicle,1966,1966-01
29132,Unknown,66V032003,STEERING:COLUMN,1966-01-19,70644,Unknown,Unknown,"General Motors, LLC",No,No,STEERING,NR (Not Reported),Vehicle,1966,1966-01


In [39]:
df.to_csv("vehicle_recalls_clean.csv")