In [1]:
import pandas as pd 
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, Float, DateTime
from sqlalchemy.dialects.sqlite import DATETIME

In [2]:
df = pd.read_csv('../Resources/collision_geo_data_clean.csv')

In [3]:
df.set_index('collision_id', inplace=True)

In [4]:
df['zip_code'] = df['zip_code'].astype(str)

In [5]:
df['crash_datetime'] = df['crash_date'] + ' ' + df['crash_time']

In [6]:
df.drop(columns=['crash_date', 'crash_time'], inplace=True)

In [7]:
df['crash_datetime'] = pd.to_datetime(df['crash_datetime'])

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1000 entries, 4455765 to 4407712
Data columns (total 16 columns):
 #   Column                         Non-Null Count  Dtype         
---  ------                         --------------  -----         
 0   street_name                    1000 non-null   object        
 1   borough                        1000 non-null   object        
 2   county                         1000 non-null   object        
 3   zip_code                       1000 non-null   object        
 4   latitude                       1000 non-null   float64       
 5   longitude                      1000 non-null   float64       
 6   number_of_persons_injured      1000 non-null   int64         
 7   number_of_persons_killed       1000 non-null   int64         
 8   number_of_pedestrians_injured  1000 non-null   int64         
 9   number_of_pedestrians_killed   1000 non-null   int64         
 10  number_of_cyclist_injured      1000 non-null   int64         
 11  number_of_cyc

In [9]:
engine = create_engine('sqlite:///../Resources/collision_db.sqlite')

In [10]:
metadata = MetaData()

In [11]:
motor_collisions = Table('motor_collisions', metadata,
                         Column('collision_id', Integer, primary_key=True),
                         Column('crash_datetime', DateTime),
                         Column('street_name', String), 
                         Column('borough', String),
                         Column('county', String),
                         Column('zip_code', String),
                         Column('latitude', Float),
                         Column('longitude', Float),
                         Column('number_of_persons_injured', Integer),
                         Column('number_of_persons_killed', Integer),
                         Column('number_of_pedestrians_injured', Integer),
                         Column('number_of_pedestrians_killed', Integer),
                         Column('number_of_cyclist_injured', Integer),
                         Column('number_of_cyclist_killed', Integer),
                         Column('number_of_motorist_injured', Integer),
                         Column('number_of_motorist_killed', Integer),
                         Column('contributing_factor_vehicle', String)
                         )
metadata.create_all(engine)

In [12]:
df.to_sql('motor_collisions', con=engine, if_exists='append', index=True)

IntegrityError: (sqlite3.IntegrityError) UNIQUE constraint failed: motor_collisions.collision_id
[SQL: INSERT INTO motor_collisions (collision_id, street_name, borough, county, zip_code, latitude, longitude, number_of_persons_injured, number_of_persons_killed, number_of_pedestrians_injured, number_of_pedestrians_killed, number_of_cyclist_injured, number_of_cyclist_killed, number_of_motorist_injured, number_of_motorist_killed, contributing_factor_vehicle, crash_datetime) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)]
[parameters: ((4455765, 'Whitestone Expressway & 20 Avenue', 'Queens', 'Queens County', '11356', 40.7127281, -74.0060152, 2, 0, 0, 0, 0, 0, 2, 0, 'Aggressive Driving/Road Rage', '2021-09-11 02:39:00.000000'), (4513547, 'Queensboro Bridge Upper', 'Manhattan', 'Queens County', '10044', 40.746402, -73.940191, 1, 0, 0, 0, 0, 0, 1, 0, 'Pavement Slippery', '2022-03-26 11:45:00.000000'), (4541903, 'Throgs Neck Bridge', 'Bronx', 'Bronx County', '10465', 40.8164164, -73.7986156, 0, 0, 0, 0, 0, 0, 0, 0, 'Following Too Closely', '2022-06-29 06:55:00.000000'), (4456314, '1211      Loring Avenue', 'Brooklyn', 'Kings County', '11208', 40.667202, -73.8665, 0, 0, 0, 0, 0, 0, 0, 0, 'Unspecified', '2021-09-11 09:35:00.000000'), (4486609, 'Saratoga Avenue & Decatur Street', 'Brooklyn', 'Kings County', '11233', 40.683304, -73.917274, 0, 0, 0, 0, 0, 0, 0, 0, None, '2021-12-14 08:13:00.000000'), (4407458, 'Major Deegan Expressway Ramp', 'Bronx', 'Bronx County', '10451', 40.81166, -73.931423, 0, 0, 0, 0, 0, 0, 0, 0, 'Unspecified', '2021-04-14 12:47:00.000000'), (4486555, 'Brooklyn Queens Expressway', 'Brooklyn', 'Kings County', '11104', 40.709183, -73.956825, 0, 0, 0, 0, 0, 0, 0, 0, 'Passing Too Closely', '2021-12-14 17:05:00.000000'), (4486660, '344       Baychester Avenue', 'Bronx', 'Bronx County', '10475', 40.86816, -73.83148, 2, 0, 0, 0, 0, 0, 2, 0, 'Unspecified', '2021-12-14 08:17:00.000000')  ... displaying 10 of 1000 total bound parameter sets ...  (4407655, 'Brooklyn Queens Expressway', 'Brooklyn', 'Kings County', '11104', 40.698544, -73.96236, 0, 0, 0, 0, 0, 0, 0, 0, 'Following Too Closely', '2021-04-14 06:55:00.000000'), (4407712, '72-24     Broadway', 'Queens', 'Queens County', '11372', 40.747536, -73.89344, 0, 0, 0, 0, 0, 0, 0, 0, 'Unspecified', '2021-04-14 19:00:00.000000'))]
(Background on this error at: https://sqlalche.me/e/14/gkpj)

In [None]:
from sqlalchemy import inspect

inspector = inspect(engine)
print(inspector.get_table_names())  # Check if the table is listed
print(inspector.get_columns('motor_collisions'))  # Check the structure of the table


['motor_collisions']
[{'name': 'collision_id', 'type': INTEGER(), 'nullable': False, 'default': None, 'primary_key': 1}, {'name': 'crash_datetime', 'type': DATETIME(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'street_name', 'type': VARCHAR(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'borough', 'type': VARCHAR(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'county', 'type': VARCHAR(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'zip_code', 'type': VARCHAR(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'latitude', 'type': FLOAT(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'longitude', 'type': FLOAT(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'number_of_persons_injured', 'type': INTEGER(), 'nullable': True, 'default': None, 'primary_key': 0}, {'name': 'number_of_persons_killed', 'type': INTEGER(), 'nullable': True, 'default': None, 'primary_key': 