In [3]:
# Import necessary plugins to pull and parse data
import json
import pandas as pd
from bs4 import BeautifulSoup
import requests
import random
from datetime import datetime, timedelta
from sqlalchemy import create_engine

## CIS Automotive API Endpoint

In [9]:
# Models
models = ['model y', 'model s', 'model 3', 'Ioniq 5', 'Bolt EV', 'Mustang Mach-E', 'ID.4']

response_bucket = []

for model in models:
    # api request
    api_url = 'https://api.api-ninjas.com/v1/cars?model={}'.format(model)

    # make call and save responses
    response = requests.get(api_url, headers={'X-Api-Key': 'key'})
    response_bucket.append(response)
    
# if response
if response.status_code == requests.codes.ok:
    response_bucket.append(response)
    print(response)
else:
    print("Error:", response.status_code, response.text)

<Response [200]>


In [10]:
# verify results are json objects and have data
for response in response_bucket:
    # responses appear to have nested model variations details within each response
    print(type(json.loads(response.text)))
    print(json.loads(response.text))

<class 'list'>
[{'city_mpg': 129, 'class': 'small sport utility vehicle', 'combination_mpg': 121, 'drive': 'awd', 'fuel_type': 'electricity', 'highway_mpg': 112, 'make': 'tesla', 'model': 'model y performance awd', 'transmission': 'a', 'year': 2020}, {'city_mpg': 116, 'class': 'small sport utility vehicle', 'combination_mpg': 111, 'drive': 'awd', 'fuel_type': 'electricity', 'highway_mpg': 106, 'make': 'tesla', 'model': 'model y performance awd (21in wheels)', 'transmission': 'a', 'year': 2020}, {'city_mpg': 127, 'class': 'small sport utility vehicle', 'combination_mpg': 121, 'drive': 'awd', 'fuel_type': 'electricity', 'highway_mpg': 114, 'make': 'tesla', 'model': 'model y long range awd', 'transmission': 'a', 'year': 2020}, {'city_mpg': 131, 'class': 'small sport utility vehicle', 'combination_mpg': 125, 'drive': 'awd', 'fuel_type': 'electricity', 'highway_mpg': 117, 'make': 'tesla', 'model': 'model y long range awd', 'transmission': 'a', 'year': 2021}, {'city_mpg': 115, 'class': 'smal

In [11]:
# Collection loop
api_response = {
    'make': [],
    'model': [],
    'class': [],
    'drive': [],
    'fuel_type': [],
    'city_mpg': [],
    'highway_mpg': [],
    'combination_mpg': [],
    'transmission': [],
    'year': []
}

# Assuming response_bucket contains all your API response objects
for response in response_bucket:
    # Load JSON data from the response text
    data = json.loads(response.text)  # Assuming each response.text is a JSON string of a list

    # Then iterate through each model variant in the data list
    for element in data:
        # Add each model detail to the corresponding list in api_response
        api_response['make'].append(element.get('make', 'N/A'))  # Using get to avoid KeyError if the key doesn't exist
        api_response['model'].append(element.get('model', 'N/A'))
        api_response['class'].append(element.get('class', 'N/A'))
        api_response['drive'].append(element.get('drive', 'N/A'))
        api_response['fuel_type'].append(element.get('fuel_type', 'N/A'))
        api_response['city_mpg'].append(element.get('city_mpg', 'N/A'))
        api_response['highway_mpg'].append(element.get('highway_mpg', 'N/A'))
        api_response['combination_mpg'].append(element.get('combination_mpg', 'N/A'))
        api_response['transmission'].append(element.get('transmission', 'N/A'))
        api_response['year'].append(element.get('year', 'N/A'))
        

In [12]:
# check if info was appropriately added
print(api_response)

{'make': ['tesla', 'tesla', 'tesla', 'tesla', 'tesla', 'tesla', 'tesla', 'tesla', 'tesla', 'tesla', 'tesla', 'tesla', 'tesla', 'tesla', 'tesla', 'hyundai', 'hyundai', 'hyundai', 'hyundai', 'hyundai', 'chevrolet', 'chevrolet', 'chevrolet', 'chevrolet', 'chevrolet', 'ford', 'ford', 'ford', 'ford', 'ford', 'volkswagen', 'volkswagen', 'volkswagen', 'volkswagen', 'volkswagen', 'volkswagen', 'volkswagen', 'volkswagen', 'volkswagen', 'volkswagen'], 'model': ['model y performance awd', 'model y performance awd (21in wheels)', 'model y long range awd', 'model y long range awd', 'model y performance awd', 'model s', 'model s (60 kw-hr battery pack)', 'model s (85 kw-hr battery pack)', 'model s (40 kw-hr battery pack)', 'model s (85 kw-hr battery pack)', 'model 3 long range', 'model 3 long range', 'model 3 long range  awd', 'model 3 long range  awd performance', 'model 3 mid range', 'ioniq 5 awd (long range)', 'ioniq 5 rwd (long range)', 'ioniq 5 rwd (standard range)', 'ioniq 5 long range rwd', '

In [13]:
df_CIS = pd.DataFrame(api_response)
print(df_CIS)

          make                                  model  \
0        tesla                model y performance awd   
1        tesla  model y performance awd (21in wheels)   
2        tesla                 model y long range awd   
3        tesla                 model y long range awd   
4        tesla                model y performance awd   
5        tesla                                model s   
6        tesla        model s (60 kw-hr battery pack)   
7        tesla        model s (85 kw-hr battery pack)   
8        tesla        model s (40 kw-hr battery pack)   
9        tesla        model s (85 kw-hr battery pack)   
10       tesla                     model 3 long range   
11       tesla                     model 3 long range   
12       tesla                model 3 long range  awd   
13       tesla    model 3 long range  awd performance   
14       tesla                      model 3 mid range   
15     hyundai               ioniq 5 awd (long range)   
16     hyundai               io

In [4]:
db_username = 'db_username'
db_password = 'db_password'
db_host = 'db_host'
db_port = 'db_port'
db_name = 'db_name'

# Create the SQLAlchemy engine
engine = create_engine(f'mysql+pymysql://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}')

In [20]:
df_CIS.to_sql('ModelCarSpecs', con=engine, if_exists='append', index=False)

40

## NHTSA Api Endpoint

In [5]:
# Api endpoint call sourcing all complaints for relevant tesla models
url_models = ['https://api.nhtsa.gov/complaints/complaintsByVehicle?make=tesla&model=model s&modelYear=2012', 
              'https://api.nhtsa.gov/complaints/complaintsByVehicle?make=tesla&model=model 3&modelYear=2017', 
              'https://api.nhtsa.gov/complaints/complaintsByVehicle?make=tesla&model=model y&modelYear=2020',
              'https://api.nhtsa.gov/complaints/complaintsByVehicle?make=hyundai&model=Ioniq 5&modelYear=2022',
              'https://api.nhtsa.gov/complaints/complaintsByVehicle?make=chevrolet&model=Bolt EV&modelYear=2023',
              'https://api.nhtsa.gov/complaints/complaintsByVehicle?make=ford&model=Mustang Mach-E&modelYear=2022',
              'https://api.nhtsa.gov/complaints/complaintsByVehicle?make=volkswagen&model=ID.4&modelYear=2022'
            ]

collection_bucket = []

# Iterate over each URL and make the request
for url in url_models:
    response = requests.get(url)
    
    # Check if the response was successful
    if response.status_code == 200:
        collection_bucket.append(response.json())
    else:
        print(f'Failed to fetch data from {url} with status code: {response.status_code}')


In [8]:
# Check integrity of data
print(collection_bucket)



In [9]:
# Checking the elements within every complaint
for element in collection_bucket:
    for complaint in element['results']:
        print(complaint)
        print('-'*100)

{'odiNumber': 11512319, 'manufacturer': 'Tesla, Inc.', 'crash': False, 'fire': False, 'numberOfInjuries': 0, 'numberOfDeaths': 0, 'dateOfIncident': '02/11/2021', 'dateComplaintFiled': '03/17/2023', 'vin': '5YJSA1CP1CF', 'components': 'ENGINE', 'summary': 'Motor is starting to make sounds and according to teslamotorsclub.com the motor will fail eventually. Many have to had to have the motor switched out multiple times. It is quite evident that there was a design flaw which Tesla corrected in later years.', 'products': [{'type': 'Vehicle', 'productYear': '2012', 'productMake': 'TESLA', 'productModel': 'MODEL S', 'manufacturer': 'Tesla, Inc.'}]}
----------------------------------------------------------------------------------------------------
{'odiNumber': 11510630, 'manufacturer': 'Tesla, Inc.', 'crash': False, 'fire': False, 'numberOfInjuries': 0, 'numberOfDeaths': 0, 'dateOfIncident': '01/19/2023', 'dateComplaintFiled': '03/07/2023', 'vin': '5YJSA1CN7CF', 'components': 'POWER TRAIN',

In [18]:
# Collection loop
# Assuming 'collection_bucket' contains the list of JSON responses
complaints_data = []

# Iterate through each complaint record in the collection_bucket
for response_data in collection_bucket:
    for complaint in response_data.get('results', []):  # Assuming each response has a 'results' key that contains the complaint data
        # Extract the necessary elements from each complaint
        complaint_record = {
            'ODINumber': complaint.get('odiNumber'),
            'Manufacturer': complaint.get('manufacturer'),
            'Crash': complaint.get('crash'),
            'Fire': complaint.get('fire'),
            'Injuries': complaint.get('numberOfInjuries'),
            'Deaths': complaint.get('numberOfDeaths'),
            'IncidentDate': complaint.get('dateOfIncident'),
            'ComplaintDate': complaint.get('dateComplaintFiled'),
            'VIN': complaint.get('vin'),
            'Component': complaint.get('components'),
            'Summary': complaint.get('summary'),
            'ProductYear': complaint['products'][0].get('productYear') if complaint.get('products') else '',
            'ProductMake': complaint['products'][0].get('productMake') if complaint.get('products') else '',
            'ProductModel': complaint['products'][0].get('productModel') if complaint.get('products') else '',
        }
        complaints_data.append(complaint_record)

In [19]:
# Convert the list of dictionaries to a pandas DataFrame
df_complaints = pd.DataFrame(complaints_data)

In [20]:
# Display DF head
print(df_complaints)

     ODINumber                       Manufacturer  Crash   Fire  Injuries  \
0     11512319                        Tesla, Inc.  False  False         0   
1     11510630                        Tesla, Inc.  False  False         0   
2     11495377                        Tesla, Inc.  False  False         0   
3     11488480                        Tesla, Inc.  False  False         0   
4     11468855                        Tesla, Inc.  False  False         0   
..         ...                                ...    ...    ...       ...   
633   11487006  Volkswagen Group of America, Inc.  False  False         0   
634   11484968  Volkswagen Group of America, Inc.  False  False         0   
635   11484722  Volkswagen Group of America, Inc.  False  False         0   
636   11479641  Volkswagen Group of America, Inc.  False  False         0   
637   11478976  Volkswagen Group of America, Inc.  False  False         0   

     Deaths IncidentDate ComplaintDate          VIN  \
0         0   02/11/

In [21]:
# Fix to load dates data into SQL
df_complaints['IncidentDate'] = pd.to_datetime(df_complaints['IncidentDate']).dt.date
df_complaints['ComplaintDate'] = pd.to_datetime(df_complaints['ComplaintDate']).dt.date

In [22]:
# Display DF head
print(df_complaints)

     ODINumber                       Manufacturer  Crash   Fire  Injuries  \
0     11512319                        Tesla, Inc.  False  False         0   
1     11510630                        Tesla, Inc.  False  False         0   
2     11495377                        Tesla, Inc.  False  False         0   
3     11488480                        Tesla, Inc.  False  False         0   
4     11468855                        Tesla, Inc.  False  False         0   
..         ...                                ...    ...    ...       ...   
633   11487006  Volkswagen Group of America, Inc.  False  False         0   
634   11484968  Volkswagen Group of America, Inc.  False  False         0   
635   11484722  Volkswagen Group of America, Inc.  False  False         0   
636   11479641  Volkswagen Group of America, Inc.  False  False         0   
637   11478976  Volkswagen Group of America, Inc.  False  False         0   

     Deaths IncidentDate ComplaintDate          VIN  \
0         0   2021-0

In [23]:
df_complaints.to_sql('MaintenanceInfo', con=engine, if_exists='append', index=False)

638

In [24]:
engine.dispose()