In [1]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import confusion_matrix
import tensorflow.keras.backend as K


In [2]:
# Step 1: Data Preprocessing

def load_data(image_dir, annotations_file, img_size=(224, 224)):
    # Load CSV annotations file
    annotations = pd.read_csv(annotations_file)

    images = []
    labels = []
    boxes = []

    for index, row in annotations.iterrows():
        img_path = os.path.join(image_dir, row['filename'])
        img = load_img(img_path, target_size=img_size)
        img = img_to_array(img)
        images.append(img)

        # Normalizing image
        images = np.array(images) / 255.0

        labels.append(row['label'])
        boxes.append([row['xmin'], row['ymin'], row['xmax'], row['ymax']])

    return np.array(images), np.array(labels), np.array(boxes)


In [None]:
# Load images and annotations
image_dir = 'path_to_images'
annotations_file = 'annotations.csv'
images, labels, boxes = load_data(image_dir, annotations_file)

In [None]:
# Step 2: Split Data into Train, Validation, and Test

X_train, X_temp, y_train, y_temp = train_test_split(images, labels, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Step 3: Building the Model Using Pretrained VGG16 and Custom Classifier

base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze the convolutional layers

inputs = Input(shape=(224, 224, 3))
x = base_model(inputs, training=False)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(32, activation='relu')(x)

In [None]:
# For bounding box prediction
bbox_output = Dense(4, name='bbox_output')(x)  # Predicting 4 coordinates for bounding box

# For classification (vehicle type prediction)
class_output = Dense(len(np.unique(labels)), activation='softmax', name='class_output')(x)

model = Model(inputs, [bbox_output, class_output])


In [None]:
# Step 4: Compile the Model

model.compile(optimizer='adam',
              loss={'bbox_output': 'mse', 'class_output': 'sparse_categorical_crossentropy'},
              metrics={'bbox_output': 'mae', 'class_output': 'accuracy'})

# Step 5: Model Training with Early Stopping and Checkpointing

callbacks = [
    EarlyStopping(patience=10, restore_best_weights=True),
    ModelCheckpoint("best_model.h5", save_best_only=True)
]

history = model.fit(X_train,
                    {'bbox_output': boxes, 'class_output': y_train},
                    validation_data=(X_val, {'bbox_output': boxes, 'class_output': y_val}),
                    epochs=50,
                    callbacks=callbacks)


In [None]:
# Step 6: Evaluate the Model on Test Data

model.load_weights("best_model.h5")
test_loss, test_bbox_loss, test_class_loss, test_bbox_mae, test_class_accuracy = model.evaluate(X_test, {'bbox_output': boxes, 'class_output': y_test})

print(f"Test Loss: {test_loss:.4f}, Test Class Accuracy: {test_class_accuracy:.4f}")

In [None]:
# Step 7: Compute Confusion Matrix

y_pred_class = model.predict(X_test)[1]
y_pred_class = np.argmax(y_pred_class, axis=1)

cm = confusion_matrix(y_test, y_pred_class)
print("Confusion Matrix:")
print(cm)

In [11]:
# Step 8: Visualize the Predictions with Bounding Boxes

def visualize_boxes(image, predicted_boxes, predicted_labels, scores, label_dict):
    for i, box in enumerate(predicted_boxes):
        cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
        cv2.putText(image, f"{label_dict[predicted_labels[i]]}: {scores[i]:.2f}",
                    (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    plt.imshow(image)
    plt.show()

In [None]:
# Predict bounding boxes and labels for a sample image
sample_image = X_test[0]  # Example: Taking the first image from the test set
predicted_bbox, predicted_class = model.predict(np.expand_dims(sample_image, axis=0))
predicted_class = np.argmax(predicted_class, axis=1)

# Visualize the result
label_dict = {0: 'car', 1: 'truck', 2: 'bus'}  # Modify based on your labels
visualize_boxes(sample_image, predicted_bbox[0], predicted_class, predicted_class, label_dict)

In [13]:
# Step 9: Compute IoU (Intersection over Union)

def calculate_iou(pred_bbox, true_bbox):
    xi1 = max(pred_bbox[0], true_bbox[0])
    yi1 = max(pred_bbox[1], true_bbox[1])
    xi2 = min(pred_bbox[2], true_bbox[2])
    yi2 = min(pred_bbox[3], true_bbox[3])

    intersection = max(0, xi2 - xi1) * max(0, yi2 - yi1)
    pred_area = (pred_bbox[2] - pred_bbox[0]) * (pred_bbox[3] - pred_bbox[1])
    true_area = (true_bbox[2] - true_bbox[0]) * (true_bbox[3] - true_bbox[1])

    union = pred_area + true_area - intersection
    iou = intersection / union if union != 0 else 0
    return iou


In [14]:
# Calculate mAP (Mean Average Precision)

def compute_map(pred_boxes, true_boxes, pred_labels, true_labels, iou_threshold=0.5):
    iou_list = []
    for p_bbox, t_bbox in zip(pred_boxes, true_boxes):
        iou = calculate_iou(p_bbox, t_bbox)
        iou_list.append(iou >= iou_threshold)
    mAP = np.mean(iou_list)
    return mAP

In [None]:
# Example for computing IoU and mAP
predicted_boxes = predicted_bbox
true_boxes = boxes[0]  # True boxes for the first image
mAP = compute_map(predicted_boxes, true_boxes, predicted_class, y_test)
print(f"mAP: {mAP:.4f}")

Part 2 of capstone project eda od tesla deaths

In [49]:

import pandas as pd
df = pd.read_csv("tesla death data refined.csv")
print(df.head())



   Case id    Year        Date  Country   State   \
0    294.0  2022.0   1/17/2023      USA       CA   
1    293.0  2022.0    1/7/2023   Canada        -   
2    292.0  2022.0    1/7/2023      USA       WA   
3    291.0  2022.0  12/22/2022      USA       GA   
4    290.0  2022.0  12/19/2022   Canada        -   

                         Description    Deaths   Tesla driver   \
0    Tesla crashes into back of semi        1.0              1   
1                      Tesla crashes        1.0              1   
2   Tesla hits pole, catches on fire        1.0             -    
3            Tesla crashes and burns        1.0              1   
4      Tesla crashes into storefront        1.0             -    

   Tesla occupant   Other vehicle   ...  Verified Tesla Autopilot Deaths   \
0               -               -   ...                                -    
1               -               -   ...                                -    
2                1              -   ...                    

In [52]:
print(df.dtypes)



Case id                                                                          float64
Year                                                                             float64
Date                                                                              object
 Country                                                                          object
 State                                                                            object
 Description                                                                      object
 Deaths                                                                          float64
 Tesla driver                                                                     object
 Tesla occupant                                                                   object
 Other vehicle                                                                    object
 Cyclists or Peds                                                                 object
 TSLA+cycl / peds    

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the CSV file
data = pd.read_csv('tesla_accidents.csv')

# Select the relevant columns
columns_to_keep = [
    'Year',
    'Country',
    'State',
    'Deaths',
    'Tesla driver',
    'Tesla occupant',
    'Other vehicle',
    'Cyclists or Peds',
    'TSLA+cycl / peds',
    'Verified Tesla Autopilot Deaths',
    'Sum of Verified Tesla Autopilot Deaths and All Deaths Reported to NHTSA SGO'
]

filtered_data = data[columns_to_keep]

# Drop duplicate rows
filtered_data = filtered_data.drop_duplicates()

# Drop rows with null values
filtered_data = filtered_data.dropna()

# Replace '-' in float and integer type data with 0
float_int_columns = filtered_data.select_dtypes(include=['float64', 'int64']).columns
filtered_data[float_int_columns] = filtered_data[float_int_columns].replace('-', 0)

# Replace '-' in string/object type data with 'unknown'
object_columns = filtered_data.select_dtypes(include=['object']).columns
filtered_data[object_columns] = filtered_data[object_columns].replace('-', 'unknown')

# Exploratory Data Analysis

# a. Number of events by date, per year, and per day for each state and country
events_per_year = filtered_data.groupby('Year').size()
events_per_state = filtered_data.groupby('State').size()
events_per_country = filtered_data.groupby('Country').size()

# Plotting the number of events per year
plt.figure(figsize=(12, 6))
sns.countplot(data=filtered_data, x='Year')
plt.title('Number of Events by Year')
plt.xticks(rotation=45)
plt.xlabel('Year')
plt.ylabel('Number of Events')
plt.show()

# b. Analyze different aspects of the death events
# Number of victims (deaths) in each accident
average_deaths_per_event = filtered_data['Deaths'].mean()
print(f'Average number of deaths per event: {average_deaths_per_event}')

# How many times did Tesla drivers die?
tesla_driver_deaths = filtered_data['Tesla driver'].sum()
print(f'Total deaths of Tesla drivers: {tesla_driver_deaths}')

# Proportion of events in which one or more occupants died
occupant_deaths = filtered_data['Tesla occupant'].sum()
total_events = filtered_data.shape[0]
proportion_occupant_deaths = occupant_deaths / total_events
print(f'Proportion of events with occupant deaths: {proportion_occupant_deaths:.2%}')

# Distribution of events in which the vehicle hit a cyclist or pedestrian
cyclist_pedestrian_events = filtered_data['Cyclists or Peds'].sum()
print(f'Total events involving cyclists or pedestrians: {cyclist_pedestrian_events}')

# Accidents involving the death of a Tesla occupant or driver along with a cyclist or pedestrian
tesla_and_cyclist_events = filtered_data[(filtered_data['Tesla driver'] > 0) | (filtered_data['Tesla occupant'] > 0) & (filtered_data['Cyclists or Peds'] > 0)]
tesla_and_cyclist_count = tesla_and_cyclist_events.shape[0]
print(f'Total events involving Tesla occupants/drivers and cyclists/pedestrians: {tesla_and_cyclist_count}')

# Frequency of Tesla colliding with other vehicles
tesla_collisions = filtered_data['Other vehicle'].sum()
print(f'Total Tesla collisions with other vehicles: {tesla_collisions}')

# c. Study the event distribution across models (assuming a 'Model' column exists)
# If you have a 'Model' column, uncomment the following lines
# model_distribution = filtered_data['Model'].value_counts()
# print(model_distribution)

# d. Check the distribution of verified Tesla autopilot deaths
plt.figure(figsize=(12, 6))
sns.histplot(filtered_data['Verified Tesla Autopilot Deaths'], bins=30, kde=True)
plt.title('Distribution of Verified Tesla Autopilot Deaths')
plt.xlabel('Verified Tesla Autopilot Deaths')
plt.ylabel('Frequency')
plt.show()