In [1]:
import pandas as pd

### Data Types: all objects in python have a type. You can check the type by using the _type()_ function. Here are a few standard ones

In [2]:
type(1.5)

float

In [3]:
type(3)

int

In [4]:
type('abc')

str

In [5]:
type(True)

bool

### You can convert between types

In [6]:
float(1)

1.0

In [7]:
str(1)

'1'

In [8]:
int('9')

9

In [9]:
int(9.9)

9

### DataFrames also have a type

In [11]:
accidents = pd.read_csv('Traffic_Accidents__2019_.csv')

In [12]:
type(accidents)

pandas.core.frame.DataFrame

### And each column has a type

In [13]:
accidents.dtypes

Accident Number                 int64
Date and Time                  object
Number of Motor Vehicles        int64
Number of Injuries              int64
Number of Fatalities            int64
Property Damage                object
Hit and Run                    object
Reporting Officer             float64
Collision Type Code           float64
Collision Type Description     object
Weather Code                  float64
Weather Description            object
Illumination Code             float64
Illumination Description       object
Harmful Code                   object
Harmful Code Description       object
Street Address                 object
City                           object
State                          object
ZIP                           float64
RPA                           float64
Precinct                       object
Latitude                      float64
Longitude                     float64
Mapped Location                object
dtype: object

In [18]:
accidents.head()

Unnamed: 0,Accident Number,Date and Time,Number of Motor Vehicles,Number of Injuries,Number of Fatalities,Property Damage,Hit and Run,Reporting Officer,Collision Type Code,Collision Type Description,...,Harmful Code Description,Street Address,City,State,ZIP,RPA,Precinct,Latitude,Longitude,Mapped Location
0,20190038972,01/15/2019 07:40:00 PM,2,0,0,,N,256374.0,4.0,ANGLE,...,MOTOR VEHICLE IN TRANSPORT,BELL RD & CEDAR POINTE PKWY,ANTIOCH,TN,37013.0,8753.0,SOUTH,36.0449,-86.6671,POINT (-86.6671 36.0449)
1,20190045402,01/17/2019 11:09:00 PM,2,0,0,,Y,405424.0,11.0,Front to Rear,...,PARKED MOTOR VEHICLE,3248 PERCY PRIEST DR,NASHVILLE,TN,37214.0,8955.0,HERMIT,36.1531,-86.6291,POINT (-86.6291 36.1531)
2,20190051468,01/20/2019 12:57:00 PM,2,0,0,,N,834798.0,6.0,SIDESWIPE - OPPOSITE DIRECTION,...,PARKED MOTOR VEHICLE,700 THOMPSON LN,NASHVILLE,TN,37204.0,8305.0,MIDTOW,36.1122,-86.7625,POINT (-86.7625 36.1122)
3,20190088097,02/02/2019 12:38:00 AM,2,0,0,,Y,660929.0,4.0,ANGLE,...,MOTOR VEHICLE IN TRANSPORT;PARKED MOTOR VEHICLE,400 RADNO,NASHVILLE,TN,,,,36.0483,-86.4369,POINT (-86.4369 36.0483)
4,20190091289,02/03/2019 01:25:00 PM,2,0,0,,N,212369.0,4.0,ANGLE,...,MOTOR VEHICLE IN TRANSPORT,ELLINGTON AG CENTER PVTDR & EDMONDSON PK,NASHVILLE,TN,37220.0,8615.0,MIDTOW,36.0618,-86.7405,POINT (-86.7405 36.0618)


### One data type you will encounter is a `datetime`

### The `Date and Time` column in the `accidents` dataframe is treated as an `object` but we can convert it to a different type, such as a `datetime` 

In [16]:
# Let's convert the 'Date and Time' column to a datetime and assign it back to itself
accidents['Date and Time'] = pd.to_datetime(accidents['Date and Time'])

# pd.to_datetime will infer the different date and time components of the string.
# If the datetime is in a strange format or you want to be explicit you can use the 'format' argument
# You will have to use datetime symbols: 
# https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior

# It will take a second to run...

In [17]:
# Now the column is a datetime64[ns]
accidents.dtypes

Accident Number                        int64
Date and Time                 datetime64[ns]
Number of Motor Vehicles               int64
Number of Injuries                     int64
Number of Fatalities                   int64
Property Damage                       object
Hit and Run                           object
Reporting Officer                    float64
Collision Type Code                  float64
Collision Type Description            object
Weather Code                         float64
Weather Description                   object
Illumination Code                    float64
Illumination Description              object
Harmful Code                          object
Harmful Code Description              object
Street Address                        object
City                                  object
State                                 object
ZIP                                  float64
RPA                                  float64
Precinct                              object
Latitude  

In [18]:
# The values in the Date and Time column look different now
accidents.head()

Unnamed: 0,Accident Number,Date and Time,Number of Motor Vehicles,Number of Injuries,Number of Fatalities,Property Damage,Hit and Run,Reporting Officer,Collision Type Code,Collision Type Description,...,Harmful Code Description,Street Address,City,State,ZIP,RPA,Precinct,Latitude,Longitude,Mapped Location
0,20190038972,2019-01-15 19:40:00,2,0,0,,N,256374.0,4.0,ANGLE,...,MOTOR VEHICLE IN TRANSPORT,BELL RD & CEDAR POINTE PKWY,ANTIOCH,TN,37013.0,8753.0,SOUTH,36.0449,-86.6671,POINT (-86.6671 36.0449)
1,20190045402,2019-01-17 23:09:00,2,0,0,,Y,405424.0,11.0,Front to Rear,...,PARKED MOTOR VEHICLE,3248 PERCY PRIEST DR,NASHVILLE,TN,37214.0,8955.0,HERMIT,36.1531,-86.6291,POINT (-86.6291 36.1531)
2,20190051468,2019-01-20 12:57:00,2,0,0,,N,834798.0,6.0,SIDESWIPE - OPPOSITE DIRECTION,...,PARKED MOTOR VEHICLE,700 THOMPSON LN,NASHVILLE,TN,37204.0,8305.0,MIDTOW,36.1122,-86.7625,POINT (-86.7625 36.1122)
3,20190088097,2019-02-02 00:38:00,2,0,0,,Y,660929.0,4.0,ANGLE,...,MOTOR VEHICLE IN TRANSPORT;PARKED MOTOR VEHICLE,400 RADNO,NASHVILLE,TN,,,,36.0483,-86.4369,POINT (-86.4369 36.0483)
4,20190091289,2019-02-03 13:25:00,2,0,0,,N,212369.0,4.0,ANGLE,...,MOTOR VEHICLE IN TRANSPORT,ELLINGTON AG CENTER PVTDR & EDMONDSON PK,NASHVILLE,TN,37220.0,8615.0,MIDTOW,36.0618,-86.7405,POINT (-86.7405 36.0618)


In [19]:
# And we can see each value is a timestamp
accidents.loc[0,'Date and Time']

Timestamp('2019-01-15 19:40:00')

### Once you have a `datetime` object, you can pull out individual parts

In [20]:
# Use .dt to specify a datetime attribute/function and then what you want to pull out

# Here we are pulling out the month from the 'Date and Time' column and saving it to a new column called 'month'
accidents['month'] = accidents['Date and Time'].dt.month
accidents.head()

Unnamed: 0,Accident Number,Date and Time,Number of Motor Vehicles,Number of Injuries,Number of Fatalities,Property Damage,Hit and Run,Reporting Officer,Collision Type Code,Collision Type Description,...,Street Address,City,State,ZIP,RPA,Precinct,Latitude,Longitude,Mapped Location,month
0,20190038972,2019-01-15 19:40:00,2,0,0,,N,256374.0,4.0,ANGLE,...,BELL RD & CEDAR POINTE PKWY,ANTIOCH,TN,37013.0,8753.0,SOUTH,36.0449,-86.6671,POINT (-86.6671 36.0449),1
1,20190045402,2019-01-17 23:09:00,2,0,0,,Y,405424.0,11.0,Front to Rear,...,3248 PERCY PRIEST DR,NASHVILLE,TN,37214.0,8955.0,HERMIT,36.1531,-86.6291,POINT (-86.6291 36.1531),1
2,20190051468,2019-01-20 12:57:00,2,0,0,,N,834798.0,6.0,SIDESWIPE - OPPOSITE DIRECTION,...,700 THOMPSON LN,NASHVILLE,TN,37204.0,8305.0,MIDTOW,36.1122,-86.7625,POINT (-86.7625 36.1122),1
3,20190088097,2019-02-02 00:38:00,2,0,0,,Y,660929.0,4.0,ANGLE,...,400 RADNO,NASHVILLE,TN,,,,36.0483,-86.4369,POINT (-86.4369 36.0483),2
4,20190091289,2019-02-03 13:25:00,2,0,0,,N,212369.0,4.0,ANGLE,...,ELLINGTON AG CENTER PVTDR & EDMONDSON PK,NASHVILLE,TN,37220.0,8615.0,MIDTOW,36.0618,-86.7405,POINT (-86.7405 36.0618),2


In [21]:
# What is the maximum number of cars involved in a single accident in July
july_accidents = accidents[accidents['month']==7]
max(july_accidents['Number of Motor Vehicles'])

8

In [22]:
# How many accidents happened in December?
sum(accidents['month']==12)

2622

### There are many different attributes associated with datetimes

In [23]:
accidents['Date and Time'].dt.time.head()

0    19:40:00
1    23:09:00
2    12:57:00
3    00:38:00
4    13:25:00
Name: Date and Time, dtype: object

In [24]:
accidents['Date and Time'].dt.date.head()

0    2019-01-15
1    2019-01-17
2    2019-01-20
3    2019-02-02
4    2019-02-03
Name: Date and Time, dtype: object

In [25]:
accidents['Date and Time'].dt.weekday.head()

0    1
1    3
2    6
3    5
4    6
Name: Date and Time, dtype: int64

In [26]:
accidents['Date and Time'].dt.is_leap_year.head()

0    False
1    False
2    False
3    False
4    False
Name: Date and Time, dtype: bool

### You can use comparison symbols on `datetime` objects as well

In [27]:
# How many accidents happened before March 3
sum(accidents['Date and Time'] < '03/03/2019')

# Note: You have to input the comparison value as a string,
# but the format can vary and pandas will attempt to infer the format.
# Try putting in different formats and rerunning this cell.

5558

### You can also perform calculations on `datetime` objects

In [28]:
# How long between the 1st and 100th accident?
accidents = accidents.sort_values('Date and Time')
accidents.loc[100, 'Date and Time'] - accidents.loc[0, 'Date and Time']

# It appears as a Timedelta, or a change in time

Timedelta('81 days 16:05:00')

### Converting `datetimes` to other formats can be tricky

##### You can use the .astype() function to convert a whole column to different type. Be sure to save the output back to the column if you want the change to persist!

In [29]:
accidents['Date and Time'].astype(int).head()

# This is the total number of nanoseconds for a given datetime

9126     1546300800000000000
526      1546300800000000000
4939     1546300800000000000
13280    1546301100000000000
18619    1546302600000000000
Name: Date and Time, dtype: int64