In [274]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'is-the-traffic-collision-fatal:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-competitions-data%2Fkaggle-v2%2F70315%2F7736702%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240314%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240314T202233Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D00abc84a5912676476021cef3eff20ac8e0f12121c7cf54a99930292f516379c8ab7fa629ff77564926438994e7d72cb7c184cbc637ca1944b0ea523c744f0d77e0ee41ba475283860b4906aa21bdd81419b3b39401713a916841ec0657a940e204b6d2d5bce61ce365218993e4b914dba92fa8a94ee7a1cb17be8e4f865256815789722f333605cd90d8e10573ef70205c20bc9fbcf27829fee51a903296bc4518876eceb8a7b34c19e7de7a7b8eed4d1b71275755d0419a928f7f447f670a365936d11343b7bc8944179e365598a9f5ebc53368ccd005d6c7c0a200f6537f048ec7db9c904fd86445043121f56eecb37d7c9309f3ab395a0e782df0b7f6896'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading is-the-traffic-collision-fatal, 814014 bytes compressed
Downloaded and uncompressed: is-the-traffic-collision-fatal
Data source import complete.


In [275]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/is-the-traffic-collision-fatal/Test.csv
/kaggle/input/is-the-traffic-collision-fatal/Train.csv


# Minimum Requirements Met
(5 marks)
-------------------------------------------------------------------------------------------------
## All requirements met.
### Applied multiple (>=4) Machine learning Techniques.
(1 mark)

### Data preprocessing using Sklearn pipeline present in the solution.
(1 mark)

### Presented all the metrics (MSE, RMSE, MAE, R-Square, Adjusted R-Square (for Regression) Precision, Recall, Accuracy, AUC, F1 Score (For Classification). Include Test Set Results from Kaggle.
(1 mark)

### Grid Search / Randomized Search / Bayesian Optimization
(1 mark)

### Feature Importance shown for all algorithms.
(1 mark)


## Coding practices
(5 Marks)
### The code is well-commented, and well explained for all the steps throughout the notebook especially in areas that you found difficult to program.(1 Marks)

### Proper use of Variable names, following the best practices.
(1 Mark)

### Use of Python Methods (definitions and classes) to avoid repetition of code.
(3 Marks)



## Presentation skills and Time taken (5 marks)

### The Group showed great presentation skills and story telling skills.
(3 Marks)

### The Group finished their presentation within the 10 minutes deadline.
(1 Mark)

### The Group answered all the questions correctly, asked by the instructor.
(1 Marks)


In [276]:
df = pd.read_csv("/kaggle/input/is-the-traffic-collision-fatal/Train.csv")
dfts = pd.read_csv("/kaggle/input/is-the-traffic-collision-fatal/Test.csv")

In [277]:
df

Unnamed: 0,INDEX_,ACCNUM,YEAR,DATE,TIME,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,WARDNUM,LATITUDE,LONGITUDE,LOCCOORD,ACCLOC,TRAFFCTL,VISIBILITY,LIGHT,RDSFCOND,ACCLASS,IMPACTYPE,INVTYPE,INVAGE,INJURY,FATAL_NO,INITDIR,VEHTYPE,MANOEUVER,DRIVACT,DRIVCOND,PEDTYPE,PEDACT,PEDCOND,CYCLISTYPE,CYCACT,CYCCOND,PEDESTRIAN,CYCLIST,AUTOMOBILE,MOTORCYCLE,TRUCK,TRSN_CITY_VEH,EMERG_VEH,PASSENGER,SPEEDING,AG_DRIV,REDLIGHT,ALCOHOL,DISABILITY,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,DIVISION,ObjectId
0,3387730,892658.0,2006,2006/03/11 05:00:00+00,852,BLOOR ST W,DUNDAS ST W,,Major Arterial,Toronto and East York,4,43.656345,-79.452490,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Fatal,Pedestrian Collisions,Driver,unknown,,,South,"Automobile, Station Wagon",Turning Left,Failed to Yield Right of Way,Unknown,,,,,,,Yes,,Yes,,,,,,,Yes,,,,88,High Park North,88,High Park North (88),D11,1
1,3387731,892658.0,2006,2006/03/11 05:00:00+00,852,BLOOR ST W,DUNDAS ST W,,Major Arterial,Toronto and East York,4,43.656345,-79.452490,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Fatal,Pedestrian Collisions,Pedestrian,65 to 69,Fatal,,North,Other,,,,Vehicle turns left while ped crosses with ROW ...,Crossing with right of way,Unknown,,,,Yes,,Yes,,,,,,,Yes,,,,88,High Park North,88,High Park North (88),D11,2
2,3388101,892810.0,2006,2006/03/11 05:00:00+00,915,MORNINGSIDE AVE,SHEPPARD AVE E,,Major Arterial,Scarborough,25,43.801943,-79.199786,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Fatal,Turning Movement,Motorcycle Driver,45 to 49,Fatal,,East,Motorcycle,Turning Right,Disobeyed Traffic Control,Unknown,,,,,,,,,Yes,Yes,,,,,,Yes,Yes,,,146,Malvern East,132,Malvern (132),D42,3
3,3389067,893184.0,2006,2006/01/01 05:00:00+00,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,19,43.699595,-79.318797,Intersection,Intersection Related,No Control,Clear,Dark,Wet,Non-Fatal Injury,Approaching,Passenger,50 to 54,Major,,,,,,,,,,,,,,,Yes,,,,,Yes,Yes,Yes,,Yes,,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,4
4,3388102,892810.0,2006,2006/03/11 05:00:00+00,915,MORNINGSIDE AVE,SHEPPARD AVE E,,Major Arterial,Scarborough,25,43.801943,-79.199786,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Fatal,Turning Movement,Driver,unknown,,,South,"Automobile, Station Wagon",Going Ahead,Driving Properly,Unknown,,,,,,,,,Yes,Yes,,,,,,Yes,Yes,,,146,Malvern East,132,Malvern (132),D42,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14995,81474608,,2018,2018/04/26 04:00:00+00,1942,ISLINGTON AVE,ALBION RD,,Major Arterial,Etobicoke York,1,43.737166,-79.565257,Intersection,At Intersection,Traffic Signal,Clear,"Dusk, artificial",Dry,Non-Fatal Injury,Turning Movement,Passenger,25 to 29,,,,,,,,,,,,,,,,Yes,,,,,Yes,,Yes,,,,3,Thistletown-Beaumond Heights,3,Thistletown-Beaumond Heights (3),D23,14996
14996,81474609,,2018,2018/04/26 04:00:00+00,1942,ISLINGTON AVE,ALBION RD,,Major Arterial,Etobicoke York,1,43.737166,-79.565257,Intersection,At Intersection,Traffic Signal,Clear,"Dusk, artificial",Dry,Non-Fatal Injury,Turning Movement,Passenger,0 to 4,Minor,,,,,,,,,,,,,,,Yes,,,,,Yes,,Yes,,,,3,Thistletown-Beaumond Heights,3,Thistletown-Beaumond Heights (3),D23,14997
14997,81474610,,2018,2018/04/26 04:00:00+00,1942,ISLINGTON AVE,ALBION RD,,Major Arterial,Etobicoke York,1,43.737166,-79.565257,Intersection,At Intersection,Traffic Signal,Clear,"Dusk, artificial",Dry,Non-Fatal Injury,Turning Movement,Passenger,5 to 9,,,,,,,,,,,,,,,,Yes,,,,,Yes,,Yes,,,,3,Thistletown-Beaumond Heights,3,Thistletown-Beaumond Heights (3),D23,14998
14998,81474611,,2018,2018/04/26 04:00:00+00,1942,ISLINGTON AVE,ALBION RD,,Major Arterial,Etobicoke York,1,43.737166,-79.565257,Intersection,At Intersection,Traffic Signal,Clear,"Dusk, artificial",Dry,Non-Fatal Injury,Turning Movement,Passenger,5 to 9,,,,,,,,,,,,,,,,Yes,,,,,Yes,,Yes,,,,3,Thistletown-Beaumond Heights,3,Thistletown-Beaumond Heights (3),D23,14999


In [278]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15000 entries, 0 to 14999
Data columns (total 55 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   INDEX_             15000 non-null  int64  
 1   ACCNUM             11302 non-null  float64
 2   YEAR               15000 non-null  int64  
 3   DATE               15000 non-null  object 
 4   TIME               15000 non-null  int64  
 5   STREET1            15000 non-null  object 
 6   STREET2            13657 non-null  object 
 7   OFFSET             1928 non-null   object 
 8   ROAD_CLASS         14643 non-null  object 
 9   DISTRICT           14986 non-null  object 
 10  WARDNUM            15000 non-null  int64  
 11  LATITUDE           15000 non-null  float64
 12  LONGITUDE          15000 non-null  float64
 13  LOCCOORD           14910 non-null  object 
 14  ACCLOC             9550 non-null   object 
 15  TRAFFCTL           14971 non-null  object 
 16  VISIBILITY         149

In [279]:
import plotly.express as px
# Assuming 'df' is your DataFrame containing the data
# Replace 'RDSFCOND' with the actual column name containing road conditions data

# Group by road conditions and count the frequency of each
road_condition_counts = df['RDSFCOND'].value_counts().reset_index()
road_condition_counts.columns = ['Road Condition', 'Frequency']

# Create a bar plot using Plotly
fig = px.bar(road_condition_counts, x='Road Condition', y='Frequency',
             title='Frequency of Road Conditions in Traffic Collisions',
             labels={'Frequency': 'Collision Count', 'Road Condition': 'Road Condition'})

# Show the plot
fig.show()

In [280]:
# Group by state and road conditions, and count the frequency of each
state_road_condition_counts = df.groupby(['DISTRICT', 'RDSFCOND']).size().reset_index(name='Frequency')

# Create a bar plot using Plotly
fig = px.bar(state_road_condition_counts, x='DISTRICT', y='Frequency', color='RDSFCOND',
             title='Frequency of Road Conditions by State',
             labels={'Frequency': 'Collision Count', 'DISTRICT': 'State', 'RDSFCOND': 'Road Condition'},
             barmode='group')

# Show the plot
fig.show()

In [281]:
# Group by road conditions and visibility, and count the frequency of each
condition_counts = df.groupby(['RDSFCOND', 'VISIBILITY']).size().reset_index(name='Frequency')

# Create a bar plot using Plotly
fig = px.bar(condition_counts, x='RDSFCOND', y='Frequency', color='VISIBILITY',
             title='Frequency of Collisions by Road Conditions and Visibility',
             labels={'Frequency': 'Collision Count', 'RDSFCOND': 'Road Condition', 'VISIBILITY': 'Visibility'},
             barmode='group')

# Show the plot
fig.show()

In [282]:

# Group by year and count the occurrences of collisions
yearly_collision_counts = df['YEAR'].value_counts().reset_index()
yearly_collision_counts.columns = ['Year', 'Collision Count']

# Sort the DataFrame by year
yearly_collision_counts = yearly_collision_counts.sort_values(by='Year')

# Create a line plot using Plotly
fig = px.bar(yearly_collision_counts, x='Year', y='Collision Count',
              title='Year-wise Occurrence of Collisions',
              labels={'Collision Count': 'Collision Count', 'Year': 'Year'})

# Show the plot
fig.show()

In [283]:
# Group by year and state and count the occurrences of collisions
state_yearly_collision_counts = df.groupby(['DISTRICT', 'YEAR']).size().reset_index(name='Collision Count')

# Sort the DataFrame by year
state_yearly_collision_counts = state_yearly_collision_counts.sort_values(by=['DISTRICT', 'YEAR'])

# Create a grouped bar plot using Plotly
fig = px.bar(state_yearly_collision_counts, x='YEAR', y='Collision Count', color='DISTRICT',
             title='Year-wise Occurrence of Collisions by State',
             labels={'Collision Count': 'Collision Count', 'YEAR': 'Year', 'DISTRICT': 'State'},
             barmode='group')

# Show the plot
fig.show()

In [284]:
# Group by light and visibility conditions and count the occurrences of collisions
light_visibility_counts = df.groupby(['LIGHT', 'VISIBILITY']).size().reset_index(name='Collision Count')

# Create a grouped bar plot using Plotly
fig = px.bar(light_visibility_counts, x='LIGHT', y='Collision Count', color='VISIBILITY',
             title='Comparison of Light and Visibility Conditions in Collisions',
             labels={'Collision Count': 'Collision Count', 'LIGHT': 'Light Condition', 'VISIBILITY': 'Visibility Condition'},
             barmode='group')

# Show the plot
fig.show()

In [285]:
# Filter out rows with null age values and get unique age groups
valid_age_groups = df.dropna(subset=['INVAGE'])['INVAGE'].unique()

# Group by age group, light conditions, and visibility conditions, and count the occurrences of collisions
age_light_visibility_counts = df.groupby(['INVAGE', 'LIGHT', 'VISIBILITY']).size().reset_index(name='Collision Count')

# Filter only the rows with valid age groups
age_light_visibility_counts = age_light_visibility_counts[age_light_visibility_counts['INVAGE'].isin(valid_age_groups)]

# Create a grouped bar plot using Plotly
fig = px.bar(age_light_visibility_counts, x='INVAGE', y='Collision Count', color='VISIBILITY',
             facet_col='LIGHT', facet_col_wrap=3,
             title='Comparison of Light and Visibility Conditions by Age Group',
             labels={'Collision Count': 'Collision Count', 'INVAGE': 'Age Group', 'VISIBILITY': 'Visibility Condition'},
             barmode='group')

# Show the plot
fig.show()

In [286]:
# Filter out rows with null age values and get unique age groups
valid_age_groups = df.dropna(subset=['INVAGE'])['INVAGE'].unique()

# Group by age group and count the occurrences of collisions
age_collision_counts = df['INVAGE'].value_counts().reset_index()
age_collision_counts.columns = ['Age Group', 'Collision Count']

# Filter only the rows with valid age groups
age_collision_counts = age_collision_counts[age_collision_counts['Age Group'].isin(valid_age_groups)]

# Create a bar plot using Plotly
fig = px.bar(age_collision_counts, x='Age Group', y='Collision Count',
             title='Collision Occurrences by Age Group',
             labels={'Collision Count': 'Collision Count', 'Age Group': 'Age Group'})

# Show the plot
fig.show()

In [287]:
# Group by injury type and count the occurrences of collisions
injury_collision_counts = df['INJURY'].value_counts().reset_index()
injury_collision_counts.columns = ['Injury Type', 'Collision Count']

# Create a bar plot using Plotly
fig = px.bar(injury_collision_counts, x='Injury Type', y='Collision Count',
             title='Collision Occurrences by Injury Type',
             labels={'Collision Count': 'Collision Count', 'Injury Type': 'Injury Type'})

# Show the plot
fig.show()

In [288]:
# Group by initial direction of travel and count the occurrences of collisions
initdir_collision_counts = df['INITDIR'].value_counts().reset_index()
initdir_collision_counts.columns = ['Initial Direction', 'Collision Count']

# Create a bar plot using Plotly
fig = px.bar(initdir_collision_counts, x='Initial Direction', y='Collision Count',
             title='Collision Occurrences by Initial Direction of Travel',
             labels={'Collision Count': 'Collision Count', 'Initial Direction': 'Initial Direction'})

# Show the plot
fig.show()

In [289]:
# Group by driver condition and count the occurrences of collisions
drivcond_collision_counts = df['DRIVCOND'].value_counts().reset_index()
drivcond_collision_counts.columns = ['Driver Condition', 'Collision Count']

# Create a bar plot using Plotly
fig = px.bar(drivcond_collision_counts, x='Driver Condition', y='Collision Count',
             title='Collision Occurrences by Driver Condition',
             labels={'Collision Count': 'Collision Count', 'Driver Condition': 'Driver Condition'})

# Show the plot
fig.show()

In [290]:
# Filter out rows with missing vehicle type
df_filtered = df.dropna(subset=['VEHTYPE'])

# Group by vehicle type and count the occurrences
collision_counts = df_filtered['VEHTYPE'].value_counts().reset_index()
collision_counts.columns = ['Vehicle Type', 'Collision Count']

# Create a bar chart using Plotly Express
fig = px.bar(collision_counts, x='Vehicle Type', y='Collision Count',
             title='Total Collision Comparison by Vehicle Type',
             labels={'Vehicle Type': 'Vehicle Type', 'Collision Count': 'Collision Count'})

# Show the plot
fig.show()

In [291]:
# Filter out rows with missing pedestrian condition
df_filtered = df.dropna(subset=['PEDCOND'])

# Group by pedestrian condition and count the occurrences
collision_counts = df_filtered['PEDCOND'].value_counts().reset_index()
collision_counts.columns = ['Pedestrian Condition', 'Collision Count']

# Create a bar chart using Plotly Express
fig = px.bar(collision_counts, x='Pedestrian Condition', y='Collision Count',
             title='Total Collision Comparison by Pedestrian Condition',
             labels={'Pedestrian Condition': 'Pedestrian Condition', 'Collision Count': 'Collision Count'})

# Show the plot
fig.show()

In [292]:
# Count the occurrences of each factor
counts = {
    'Cyclist Involved': df['CYCLIST'].notnull().sum(),
    'Pedestrian Involved': df['PEDESTRIAN'].notnull().sum(),
    'Transit or City Vehicle Involved': df['TRSN_CITY_VEH'].notnull().sum(),
    'Passenger Involved': df['PASSENGER'].notnull().sum(),
    'Speeding Related': df['SPEEDING'].notnull().sum(),
    'Red Light Related': df['REDLIGHT'].notnull().sum(),
    'Alcohol Related': df['ALCOHOL'].notnull().sum(),
}

# Create a DataFrame from the counts
data = pd.DataFrame.from_dict(counts, orient='index', columns=['Count']).reset_index()

# Rename columns
data.columns = ['Factor', 'Count']

# Create a bar chart using Plotly Express
fig = px.bar(data, x='Factor', y='Count',
             title='Total Accidents and Involvement of Different Factors',
             labels={'Factor': 'Factor', 'Count': 'Count'})

# Show the plot
fig.show()

In [293]:
# Count the occurrences of each factor
counts = {
    'Cyclist Involved': df['CYCLIST'].notnull().sum(),
    'Pedestrian Involved': df['PEDESTRIAN'].notnull().sum(),
    'Transit or City Vehicle Involved': df['TRSN_CITY_VEH'].notnull().sum(),
    'Passenger Involved': df['PASSENGER'].notnull().sum(),
    'Speeding Related': df['SPEEDING'].notnull().sum(),
    'Red Light Related': df['REDLIGHT'].notnull().sum(),
    'Alcohol Related': df['ALCOHOL'].notnull().sum(),
}

# Create a DataFrame from the counts
data = pd.DataFrame.from_dict(counts, orient='index', columns=['Count']).reset_index()

# Rename columns
data.columns = ['Factor', 'Count']

# Create a pie chart using Plotly Express
fig = px.pie(data, names='Factor', values='Count',
             title='Distribution of Accidents by Factor')

# Show the plot
fig.show()

In [294]:
# Filter out rows with missing involvement type
df_filtered = df.dropna(subset=['INVTYPE'])

# Group by involvement type and count the occurrences
collision_counts = df_filtered['INVTYPE'].value_counts().reset_index()
collision_counts.columns = ['Involvement Type', 'Collision Count']

# Create a bar chart using Plotly Express
fig = px.bar(collision_counts, x='Involvement Type', y='Collision Count',
             title='Total Collisions by Involvement Type',
             labels={'Involvement Type': 'Involvement Type', 'Collision Count': 'Collision Count'})

# Show the plot
fig.show()

In [295]:
# Filter out rows with missing district information
df_filtered = df.dropna(subset=['DISTRICT'])

# Group by district and count the occurrences
district_counts = df_filtered['DISTRICT'].value_counts().reset_index()
district_counts.columns = ['District', 'Collision Count']

# Create a bar chart using Plotly Express
fig = px.bar(district_counts, x='District', y='Collision Count',
             title='Total Collisions by District',
             labels={'District': 'District', 'Collision Count': 'Collision Count'})

# Show the plot
fig.show()

In [296]:
# Define a function to map dates to seasons
def get_season(date):
    month = date.month
    if 3 <= month <= 5:
        return 'Spring'
    elif 6 <= month <= 8:
        return 'Summer'
    elif 9 <= month <= 11:
        return 'Autumn'
    else:
        return 'Winter'

# Convert the 'DATE' column to datetime format if it's not already
df['DATE'] = pd.to_datetime(df['DATE'])

# Apply the function to create a new column for seasons
df['SEASON'] = df['DATE'].apply(get_season)

In [297]:
# Filter out rows with missing season information
df_filtered = df.dropna(subset=['SEASON'])

# Group by season and count the occurrences
season_counts = df_filtered['SEASON'].value_counts().reset_index()
season_counts.columns = ['Season', 'Accident Count']

# Create a bar chart using Plotly Express
fig = px.bar(season_counts, x='Season', y='Accident Count',
             title='Accidents by Season',
             labels={'Season': 'Season', 'Accident Count': 'Number of Accidents'})

# Show the plot
fig.show()

In [298]:
# Filter out rows with missing season and district information
df_filtered = df.dropna(subset=['SEASON', 'DISTRICT'])

# Group by season and district and count the occurrences
season_district_counts = df_filtered.groupby(['DISTRICT', 'SEASON']).size().reset_index(name='Accident Count')

# Create a grouped bar chart using Plotly Express
fig = px.bar(season_district_counts, x='DISTRICT', y='Accident Count', color='SEASON',
             title='Accidents by District and Season',
             labels={'DISTRICT': 'District', 'Accident Count': 'Number of Accidents'})

# Show the plot
fig.show()

In [299]:
null_count = df.columns[df.isnull().any()]

print(null_count)

Index(['ACCNUM', 'STREET2', 'OFFSET', 'ROAD_CLASS', 'DISTRICT', 'LOCCOORD',
       'ACCLOC', 'TRAFFCTL', 'VISIBILITY', 'RDSFCOND', 'INVTYPE', 'INJURY',
       'FATAL_NO', 'INITDIR', 'VEHTYPE', 'MANOEUVER', 'DRIVACT', 'DRIVCOND',
       'PEDTYPE', 'PEDACT', 'PEDCOND', 'CYCLISTYPE', 'CYCACT', 'CYCCOND',
       'PEDESTRIAN', 'CYCLIST', 'AUTOMOBILE', 'MOTORCYCLE', 'TRUCK',
       'TRSN_CITY_VEH', 'EMERG_VEH', 'PASSENGER', 'SPEEDING', 'AG_DRIV',
       'REDLIGHT', 'ALCOHOL', 'DISABILITY'],
      dtype='object')


In [300]:
# First result: Total null values for each feature
null_counts = df.isnull().sum()
null_df = pd.DataFrame(null_counts, columns=["Missing Values"])

# Second result: Total unique values for each feature
unique_values = df.nunique().reset_index()
unique_values.columns = ["Feature", "Unique Values"]

# Combine the two DataFrames
combined_df = pd.merge(unique_values, null_df, left_on="Feature", right_index=True)

# Display the combined DataFrame
print(combined_df)

              Feature  Unique Values  Missing Values
0              INDEX_          15000               0
1              ACCNUM           3822            3698
2                YEAR             13               0
3                DATE           3082               0
4                TIME           1276               0
5             STREET1           1547               0
6             STREET2           2344            1343
7              OFFSET            339           13072
8          ROAD_CLASS              9             357
9            DISTRICT              4              14
10            WARDNUM             71               0
11           LATITUDE           3475               0
12          LONGITUDE           3901               0
13           LOCCOORD              7              90
14             ACCLOC              9            5450
15           TRAFFCTL             10              29
16         VISIBILITY              8              14
17              LIGHT              9          

In [301]:
df[df['FATAL_NO'] == df['FATAL_NO'].notnull()]

Unnamed: 0,INDEX_,ACCNUM,YEAR,DATE,TIME,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,WARDNUM,LATITUDE,LONGITUDE,LOCCOORD,ACCLOC,TRAFFCTL,VISIBILITY,LIGHT,RDSFCOND,ACCLASS,IMPACTYPE,INVTYPE,INVAGE,INJURY,FATAL_NO,INITDIR,VEHTYPE,MANOEUVER,DRIVACT,DRIVCOND,PEDTYPE,PEDACT,PEDCOND,CYCLISTYPE,CYCACT,CYCCOND,PEDESTRIAN,CYCLIST,AUTOMOBILE,MOTORCYCLE,TRUCK,TRSN_CITY_VEH,EMERG_VEH,PASSENGER,SPEEDING,AG_DRIV,REDLIGHT,ALCOHOL,DISABILITY,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,DIVISION,ObjectId,SEASON
2070,5307141,1012986.0,2008,2008-01-05 05:00:00+00:00,1826,EGLINTON AVE E,OSWEGO RD,,Major Arterial,Scarborough,2120,43.737645,-79.24369,Intersection,,No Control,Rain,Dark,Wet,Fatal,Pedestrian Collisions,Pedestrian,30 to 34,Fatal,1.0,North,Other,,,,Vehicle is going straight thru inter.while ped...,"Crossing, no Traffic Control",Normal,,,,Yes,,Yes,,,,,,,,,,,138,Eglinton East,138,Eglinton East (138),D43,2071,Winter
4997,5188679,1078551.0,2009,2009-01-01 05:00:00+00:00,259,EGLINTON AVE E,DANFORTH RD,,Major Arterial,Scarborough,2120,43.737045,-79.24619,Intersection,At Intersection,Traffic Signal,Clear,Dark,Wet,Fatal,Pedestrian Collisions,Pedestrian,50 to 54,Fatal,1.0,North,Other,,,,Vehicle is going straight thru inter.while ped...,Crossing without right of way,"Ability Impaired, Alcohol Over .80",,,,Yes,,Yes,,,,,,,,,,,138,Eglinton East,138,Eglinton East (138),D43,4998,Winter
5839,6253428,1146864.0,2010,2010-01-10 05:00:00+00:00,329,KING ST E,PARLIAMENT ST,,Major Arterial,Toronto and East York,13,43.652745,-79.36319,Intersection,At Intersection,Traffic Signal,Clear,Dark,Dry,Fatal,Approaching,Driver,35 to 39,Fatal,1.0,East,"Automobile, Station Wagon",Going Ahead,Lost control,Had Been Drinking,,,,,,,,,Yes,,,,,,Yes,Yes,,,,73,Moss Park,73,Moss Park (73),D51,5840,Winter
6743,6583562,1212031.0,2011,2011-01-11 05:00:00+00:00,1023,QUEEN ST W,BATHURST ST,,Major Arterial,Toronto and East York,10,43.647245,-79.40389,Intersection,At Intersection,No Control,Clear,Daylight,Wet,Fatal,Pedestrian Collisions,Pedestrian,unknown,Fatal,1.0,North,Other,,,,Pedestrian involved in a collision with transi...,"Crossing, no Traffic Control",Unknown,,,,Yes,,,,,Yes,,,,,,,,78,Kensington-Chinatown,78,Kensington-Chinatown (78),D14,6744,Winter
7327,7376307,1274722.0,2012,2012-01-12 05:00:00+00:00,2110,FOREST GROVE DR,PAGE AVE,,Local,North York,17,43.779645,-79.37889,Mid-Block,,No Control,Clear,"Dark, artificial",Wet,Fatal,Pedestrian Collisions,Pedestrian,80 to 84,Fatal,1.0,Unknown,Other,,,,Vehicle is reversing and hits pedestrian,Other,Medical or Physical Disability,,,,Yes,,Yes,,,,,,,,,,,52,Bayview Village,52,Bayview Village (52),D33,7328,Winter
9830,7774165,1334928.0,2013,2013-01-10 05:00:00+00:00,1851,MORNINGSIDE AVE,OLD FINCH AVE,,Minor Arterial,Scarborough,25,43.818743,-79.215091,Intersection,At Intersection,Traffic Signal,Clear,Dark,Dry,Fatal,Turning Movement,Driver,70 to 74,Fatal,1.0,North,"Automobile, Station Wagon",Going Ahead,Disobeyed Traffic Control,Unknown,,,,,,,,,Yes,,,Yes,,,Yes,Yes,Yes,,,146,Malvern East,132,Malvern (132),D42,9831,Winter
10256,80000020,141375171.0,2014,2014-01-19 05:00:00+00:00,2145,EGLINTON AVE W,NORTHCLIFFE BLVD,,Major Arterial,North York,812,43.69625,-79.447673,Intersection,At Intersection,Traffic Signal,Clear,"Dark, artificial",Wet,Fatal,Pedestrian Collisions,Pedestrian,35 to 39,Fatal,1.0,South,,,,,Vehicle is going straight thru inter.while ped...,Crossing without right of way,Unknown,,,,Yes,,Yes,,,,,,,,,,,107,Oakwood Village,107,Oakwood Village (107),D13,10257,Winter
11303,80000105,,2015,2015-01-11 05:00:00+00:00,353,477 BROADVIEW AVE,,,Minor Arterial,Toronto and East York,14,43.669097,-79.352985,Mid-Block,,No Control,Clear,"Dark, artificial",Dry,Fatal,SMV Other,Driver,20 to 24,Fatal,1.0,South,"Automobile, Station Wagon",Going Ahead,Lost control,Unknown,,,,,,,,,Yes,,,,,,Yes,Yes,,,,68,North Riverdale,68,North Riverdale (68),D55,11304,Winter
12498,80497621,,2016,2016-01-03 05:00:00+00:00,1915,1400 KENNEDY RD,,196 m North of,Major Arterial,Scarborough,21,43.764498,-79.280681,Mid-Block,Non Intersection,No Control,Clear,"Dark, artificial",Dry,Fatal,Pedestrian Collisions,Pedestrian,35 to 39,Fatal,1.0,Unknown,,,,,Pedestrian hit at mid-block,Crossing without right of way,Had Been Drinking,,,,Yes,,Yes,,,,,Yes,,Yes,,,,126,Dorset Park,126,Dorset Park (126),D41,12499,Winter
13571,80609323,,2017,2017-01-03 05:00:00+00:00,717,LOWER SHERBOURNE ST,THE ESPLANADE,,Minor Arterial,Toronto and East York,1013,43.648974,-79.367259,Intersection,At Intersection,Traffic Signal,Rain,Dark,Wet,Fatal,Pedestrian Collisions,Pedestrian,80 to 84,Fatal,1.0,West,,,,,Vehicle is going straight thru inter.while ped...,Crossing without right of way,Normal,,,,Yes,,Yes,,,,,,,,,,,166,St Lawrence-East Bayfront-The Islands,77,Waterfront Communities-The Island (77),D51,13572,Winter


In [302]:
df[df["CYCACT"] == df['CYCACT'].unique]

Unnamed: 0,INDEX_,ACCNUM,YEAR,DATE,TIME,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,WARDNUM,LATITUDE,LONGITUDE,LOCCOORD,ACCLOC,TRAFFCTL,VISIBILITY,LIGHT,RDSFCOND,ACCLASS,IMPACTYPE,INVTYPE,INVAGE,INJURY,FATAL_NO,INITDIR,VEHTYPE,MANOEUVER,DRIVACT,DRIVCOND,PEDTYPE,PEDACT,PEDCOND,CYCLISTYPE,CYCACT,CYCCOND,PEDESTRIAN,CYCLIST,AUTOMOBILE,MOTORCYCLE,TRUCK,TRSN_CITY_VEH,EMERG_VEH,PASSENGER,SPEEDING,AG_DRIV,REDLIGHT,ALCOHOL,DISABILITY,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,DIVISION,ObjectId,SEASON


In [303]:
df['CYCACT'].unique()

array([nan, 'Failed to Yield Right of Way', 'Driving Properly', 'Other',
       'Improper Passing', 'Disobeyed Traffic Control', 'Lost control',
       'Improper Turn', 'Improper Lane Change', 'Following too Close',
       'Speed too Fast For Condition', 'Wrong Way on One Way Road'],
      dtype=object)

In [304]:
# Get a specific value from the "MOTORCYCLE" column
motorcycle_value = df.at[0, "MOTORCYCLE"]

# Convert the value to a string
str_motorcycle_value = str(motorcycle_value)

# Display the resulting string
print(str_motorcycle_value)

nan


In [305]:
df.dtypes

INDEX_                             int64
ACCNUM                           float64
YEAR                               int64
DATE                 datetime64[ns, UTC]
TIME                               int64
STREET1                           object
STREET2                           object
OFFSET                            object
ROAD_CLASS                        object
DISTRICT                          object
WARDNUM                            int64
LATITUDE                         float64
LONGITUDE                        float64
LOCCOORD                          object
ACCLOC                            object
TRAFFCTL                          object
VISIBILITY                        object
LIGHT                             object
RDSFCOND                          object
ACCLASS                           object
IMPACTYPE                         object
INVTYPE                           object
INVAGE                            object
INJURY                            object
FATAL_NO        

In [306]:
df["YEAR"]

0        2006
1        2006
2        2006
3        2006
4        2006
         ... 
14995    2018
14996    2018
14997    2018
14998    2018
14999    2018
Name: YEAR, Length: 15000, dtype: int64

In [307]:
df["TIME"].isna().sum()

0

In [308]:
df["TIME"].isna().sum()

0

In [309]:
df["DATE"].isna().sum()

0

In [310]:
df['DATE'] = pd.to_datetime(df['DATE'])

# Accessing year, month, and day components
df['Year'] = df['DATE'].dt.year
df['Month'] = df['DATE'].dt.month
df['Day'] = df['DATE'].dt.day

# Displaying the updated DataFrame
print(df[['DATE', 'Year', 'Month', 'Day']].head())

                       DATE  Year  Month  Day
0 2006-03-11 05:00:00+00:00  2006      3   11
1 2006-03-11 05:00:00+00:00  2006      3   11
2 2006-03-11 05:00:00+00:00  2006      3   11
3 2006-01-01 05:00:00+00:00  2006      1    1
4 2006-03-11 05:00:00+00:00  2006      3   11


In [311]:
first_row = df.iloc[0]
second_row = df.iloc[1]

comparison = (first_row == second_row)
# print(comparison)

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

# Print the comparison DataFrame
print(comparison)

# true_count = comparison.value_counts()[True]
# false_count = comparison.value_counts()[False]

# print("Number of True values:", true_count)
# print("Number of False values:", false_count)

INDEX_               False
ACCNUM                True
YEAR                  True
DATE                  True
TIME                  True
STREET1               True
STREET2               True
OFFSET               False
ROAD_CLASS            True
DISTRICT              True
WARDNUM               True
LATITUDE              True
LONGITUDE             True
LOCCOORD              True
ACCLOC                True
TRAFFCTL              True
VISIBILITY            True
LIGHT                 True
RDSFCOND              True
ACCLASS               True
IMPACTYPE             True
INVTYPE              False
INVAGE               False
INJURY               False
FATAL_NO             False
INITDIR              False
VEHTYPE              False
MANOEUVER            False
DRIVACT              False
DRIVCOND             False
PEDTYPE              False
PEDACT               False
PEDCOND              False
CYCLISTYPE           False
CYCACT               False
CYCCOND              False
PEDESTRIAN            True
C

In [312]:
df.head(30)

Unnamed: 0,INDEX_,ACCNUM,YEAR,DATE,TIME,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,WARDNUM,LATITUDE,LONGITUDE,LOCCOORD,ACCLOC,TRAFFCTL,VISIBILITY,LIGHT,RDSFCOND,ACCLASS,IMPACTYPE,INVTYPE,INVAGE,INJURY,FATAL_NO,INITDIR,VEHTYPE,MANOEUVER,DRIVACT,DRIVCOND,PEDTYPE,PEDACT,PEDCOND,CYCLISTYPE,CYCACT,CYCCOND,PEDESTRIAN,CYCLIST,AUTOMOBILE,MOTORCYCLE,TRUCK,TRSN_CITY_VEH,EMERG_VEH,PASSENGER,SPEEDING,AG_DRIV,REDLIGHT,ALCOHOL,DISABILITY,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,DIVISION,ObjectId,SEASON,Year,Month,Day
0,3387730,892658.0,2006,2006-03-11 05:00:00+00:00,852,BLOOR ST W,DUNDAS ST W,,Major Arterial,Toronto and East York,4,43.656345,-79.45249,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Fatal,Pedestrian Collisions,Driver,unknown,,,South,"Automobile, Station Wagon",Turning Left,Failed to Yield Right of Way,Unknown,,,,,,,Yes,,Yes,,,,,,,Yes,,,,88,High Park North,88,High Park North (88),D11,1,Spring,2006,3,11
1,3387731,892658.0,2006,2006-03-11 05:00:00+00:00,852,BLOOR ST W,DUNDAS ST W,,Major Arterial,Toronto and East York,4,43.656345,-79.45249,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Fatal,Pedestrian Collisions,Pedestrian,65 to 69,Fatal,,North,Other,,,,Vehicle turns left while ped crosses with ROW ...,Crossing with right of way,Unknown,,,,Yes,,Yes,,,,,,,Yes,,,,88,High Park North,88,High Park North (88),D11,2,Spring,2006,3,11
2,3388101,892810.0,2006,2006-03-11 05:00:00+00:00,915,MORNINGSIDE AVE,SHEPPARD AVE E,,Major Arterial,Scarborough,25,43.801943,-79.199786,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Fatal,Turning Movement,Motorcycle Driver,45 to 49,Fatal,,East,Motorcycle,Turning Right,Disobeyed Traffic Control,Unknown,,,,,,,,,Yes,Yes,,,,,,Yes,Yes,,,146,Malvern East,132,Malvern (132),D42,3,Spring,2006,3,11
3,3389067,893184.0,2006,2006-01-01 05:00:00+00:00,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,19,43.699595,-79.318797,Intersection,Intersection Related,No Control,Clear,Dark,Wet,Non-Fatal Injury,Approaching,Passenger,50 to 54,Major,,,,,,,,,,,,,,,Yes,,,,,Yes,Yes,Yes,,Yes,,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,4,Winter,2006,1,1
4,3388102,892810.0,2006,2006-03-11 05:00:00+00:00,915,MORNINGSIDE AVE,SHEPPARD AVE E,,Major Arterial,Scarborough,25,43.801943,-79.199786,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Fatal,Turning Movement,Driver,unknown,,,South,"Automobile, Station Wagon",Going Ahead,Driving Properly,Unknown,,,,,,,,,Yes,Yes,,,,,,Yes,Yes,,,146,Malvern East,132,Malvern (132),D42,5,Spring,2006,3,11
5,3387793,892682.0,2006,2006-03-12 05:00:00+00:00,240,EGLINTON AVE E,COMMONWEALTH AVE,,Major Arterial,Scarborough,2120,43.734945,-79.25619,Mid-Block,,No Control,Clear,Dark,Dry,Fatal,Pedestrian Collisions,Driver,25 to 29,,,West,"Automobile, Station Wagon",Going Ahead,Other,"Ability Impaired, Alcohol",,,,,,,Yes,,Yes,,,,,,,,,Yes,,124,Kennedy Park,138,Eglinton East (138),D41,6,Spring,2006,3,12
6,3389068,893184.0,2006,2006-01-01 05:00:00+00:00,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,19,43.699595,-79.318797,Intersection,Intersection Related,No Control,Clear,Dark,Wet,Non-Fatal Injury,Approaching,Passenger,15 to 19,Minor,,,,,,,,,,,,,,,Yes,,,,,Yes,Yes,Yes,,Yes,,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,7,Winter,2006,1,1
7,3389069,893184.0,2006,2006-01-01 05:00:00+00:00,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,19,43.699595,-79.318797,Intersection,Intersection Related,No Control,Clear,Dark,Wet,Non-Fatal Injury,Approaching,Driver,55 to 59,Minor,,North,"Automobile, Station Wagon",Going Ahead,Driving Properly,Normal,,,,,,,,,Yes,,,,,Yes,Yes,Yes,,Yes,,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,8,Winter,2006,1,1
8,3387794,892682.0,2006,2006-03-12 05:00:00+00:00,240,EGLINTON AVE E,COMMONWEALTH AVE,,Major Arterial,Scarborough,2120,43.734945,-79.25619,Mid-Block,,No Control,Clear,Dark,Dry,Fatal,Pedestrian Collisions,Pedestrian,30 to 34,Minor,,South,Other,,,,Pedestrian hit at mid-block,"Crossing, no Traffic Control",Normal,,,,Yes,,Yes,,,,,,,,,Yes,,124,Kennedy Park,138,Eglinton East (138),D41,9,Spring,2006,3,12
9,3387795,892682.0,2006,2006-03-12 05:00:00+00:00,240,EGLINTON AVE E,COMMONWEALTH AVE,,Major Arterial,Scarborough,2120,43.734945,-79.25619,Mid-Block,,No Control,Clear,Dark,Dry,Fatal,Pedestrian Collisions,Pedestrian,40 to 44,Fatal,,South,Other,,,,Pedestrian hit at mid-block,"Crossing, no Traffic Control",Normal,,,,Yes,,Yes,,,,,,,,,Yes,,124,Kennedy Park,138,Eglinton East (138),D41,10,Spring,2006,3,12


In [313]:
duplicate_accidents = df[df.duplicated(subset=['ACCNUM'], keep=False)]

duplicate_accidents.shape

(14857, 59)

In [314]:
df = df.sort_values(by='ACCNUM', ascending=False)


In [315]:
df.head(5)

Unnamed: 0,INDEX_,ACCNUM,YEAR,DATE,TIME,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,WARDNUM,LATITUDE,LONGITUDE,LOCCOORD,ACCLOC,TRAFFCTL,VISIBILITY,LIGHT,RDSFCOND,ACCLASS,IMPACTYPE,INVTYPE,INVAGE,INJURY,FATAL_NO,INITDIR,VEHTYPE,MANOEUVER,DRIVACT,DRIVCOND,PEDTYPE,PEDACT,PEDCOND,CYCLISTYPE,CYCACT,CYCCOND,PEDESTRIAN,CYCLIST,AUTOMOBILE,MOTORCYCLE,TRUCK,TRSN_CITY_VEH,EMERG_VEH,PASSENGER,SPEEDING,AG_DRIV,REDLIGHT,ALCOHOL,DISABILITY,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,DIVISION,ObjectId,SEASON,Year,Month,Day
10961,80164629,4008024000.0,2014,2014-06-19 04:00:00+00:00,1600,STEELES AVE W,SHALE GT,,Major Arterial,North York,6,43.783209,-79.487293,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Non-Fatal Injury,Cyclist Collisions,Driver,40 to 44,,,East,"Automobile, Station Wagon",Turning Right,Failed to Yield Right of Way,Inattentive,,,,,,,,Yes,Yes,,,,,,,Yes,,,,27,York University Heights,27,York University Heights (27),D31,10962,Summer,2014,6,19
10966,80164630,4008024000.0,2014,2014-06-19 04:00:00+00:00,1600,STEELES AVE W,SHALE GT,,Major Arterial,North York,6,43.783209,-79.487293,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Non-Fatal Injury,Cyclist Collisions,Cyclist,55 to 59,Major,,North,Bicycle,Going Ahead,,,,,,Motorist turning right on red at signalized in...,Driving Properly,Normal,,Yes,Yes,,,,,,,Yes,,,,27,York University Heights,27,York University Heights (27),D31,10967,Summer,2014,6,19
10411,80153767,4008010000.0,2014,2014-03-07 05:00:00+00:00,650,DON VALLEY PARKWAY S,LAWRENCE AVE E,,,North York,16,43.740267,-79.33224,Mid-Block,Non Intersection,No Control,Clear,Daylight,Dry,Non-Fatal Injury,Rear End,Driver,60 to 64,Major,,South,"Automobile, Station Wagon",Stopped,Driving Properly,Normal,,,,,,,,,Yes,,,,,,,Yes,,,,42,Banbury-Don Mills,42,Banbury-Don Mills (42),D33,10412,Spring,2014,3,7
10425,80153768,4008010000.0,2014,2014-03-07 05:00:00+00:00,650,DON VALLEY PARKWAY S,LAWRENCE AVE E,,,North York,16,43.740267,-79.33224,Mid-Block,Non Intersection,No Control,Clear,Daylight,Dry,Non-Fatal Injury,Rear End,Driver,25 to 29,Major,,South,"Automobile, Station Wagon",Slowing or Stopping,Following too Close,Inattentive,,,,,,,,,Yes,,,,,,,Yes,,,,42,Banbury-Don Mills,42,Banbury-Don Mills (42),D33,10426,Spring,2014,3,7
10321,80153506,4008010000.0,2014,2014-03-05 05:00:00+00:00,1800,FINCH AVE E,BRIDLETOWNE CRCL,,Major Arterial,Scarborough,22,43.79584,-79.319301,Intersection,Intersection Related,Traffic Signal,Clear,"Dusk, artificial",Dry,Non-Fatal Injury,Rear End,Driver,30 to 34,Major,,East,"Automobile, Station Wagon",Stopped,Driving Properly,Normal,,,,,,,,,Yes,,,,,,,,,,,147,L'Amoreaux West,117,L'Amoreaux (117),D42,10322,Spring,2014,3,5


In [316]:
df.columns

Index(['INDEX_', 'ACCNUM', 'YEAR', 'DATE', 'TIME', 'STREET1', 'STREET2',
       'OFFSET', 'ROAD_CLASS', 'DISTRICT', 'WARDNUM', 'LATITUDE', 'LONGITUDE',
       'LOCCOORD', 'ACCLOC', 'TRAFFCTL', 'VISIBILITY', 'LIGHT', 'RDSFCOND',
       'ACCLASS', 'IMPACTYPE', 'INVTYPE', 'INVAGE', 'INJURY', 'FATAL_NO',
       'INITDIR', 'VEHTYPE', 'MANOEUVER', 'DRIVACT', 'DRIVCOND', 'PEDTYPE',
       'PEDACT', 'PEDCOND', 'CYCLISTYPE', 'CYCACT', 'CYCCOND', 'PEDESTRIAN',
       'CYCLIST', 'AUTOMOBILE', 'MOTORCYCLE', 'TRUCK', 'TRSN_CITY_VEH',
       'EMERG_VEH', 'PASSENGER', 'SPEEDING', 'AG_DRIV', 'REDLIGHT', 'ALCOHOL',
       'DISABILITY', 'HOOD_158', 'NEIGHBOURHOOD_158', 'HOOD_140',
       'NEIGHBOURHOOD_140', 'DIVISION', 'ObjectId', 'SEASON', 'Year', 'Month',
       'Day'],
      dtype='object')

In [317]:
one_accident = df[df.duplicated(subset=['TIME', 'DIVISION', 'STREET1', 'STREET2', 'NEIGHBOURHOOD_140', 'HOOD_140'], keep=False)]

# Group by the condition and assign a unique index ID
one_accident['ACC_NUM2'] = one_accident.groupby(['TIME', 'DIVISION', 'STREET1', 'STREET2', 'NEIGHBOURHOOD_140', 'HOOD_140']).ngroup()

# Merge the unique index IDs back to the original DataFrame
df = pd.merge(df, one_accident[['ACC_NUM2']], left_index=True, right_index=True, how='left')

# Fill NaN values in the ACC_NUM column with -1
df['ACC_NUM2'] = df['ACC_NUM2'].fillna(-1)

# Display the DataFrame with the assigned unique index IDs



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [323]:
df.head(5)

Unnamed: 0,INDEX_,ACCNUM,YEAR,DATE,TIME,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,WARDNUM,LATITUDE,LONGITUDE,LOCCOORD,ACCLOC,TRAFFCTL,VISIBILITY,LIGHT,RDSFCOND,ACCLASS,IMPACTYPE,INVTYPE,INVAGE,INJURY,FATAL_NO,INITDIR,VEHTYPE,MANOEUVER,DRIVACT,DRIVCOND,PEDTYPE,PEDACT,PEDCOND,CYCLISTYPE,CYCACT,CYCCOND,PEDESTRIAN,CYCLIST,AUTOMOBILE,MOTORCYCLE,TRUCK,TRSN_CITY_VEH,EMERG_VEH,PASSENGER,SPEEDING,AG_DRIV,REDLIGHT,ALCOHOL,DISABILITY,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,DIVISION,ObjectId,SEASON,Year,Month,Day,ACC_NUM2
10961,80164629,4008024000.0,2014,2014-06-19 04:00:00+00:00,1600,STEELES AVE W,SHALE GT,,Major Arterial,North York,6,43.783209,-79.487293,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Non-Fatal Injury,Cyclist Collisions,Driver,40 to 44,,,East,"Automobile, Station Wagon",Turning Right,Failed to Yield Right of Way,Inattentive,,,,,,,,Yes,Yes,,,,,,,Yes,,,,27,York University Heights,27,York University Heights (27),D31,10962,Summer,2014,6,19,2613.0
10966,80164630,4008024000.0,2014,2014-06-19 04:00:00+00:00,1600,STEELES AVE W,SHALE GT,,Major Arterial,North York,6,43.783209,-79.487293,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Non-Fatal Injury,Cyclist Collisions,Cyclist,55 to 59,Major,,North,Bicycle,Going Ahead,,,,,,Motorist turning right on red at signalized in...,Driving Properly,Normal,,Yes,Yes,,,,,,,Yes,,,,27,York University Heights,27,York University Heights (27),D31,10967,Summer,2014,6,19,2613.0
10411,80153767,4008010000.0,2014,2014-03-07 05:00:00+00:00,650,DON VALLEY PARKWAY S,LAWRENCE AVE E,,,North York,16,43.740267,-79.33224,Mid-Block,Non Intersection,No Control,Clear,Daylight,Dry,Non-Fatal Injury,Rear End,Driver,60 to 64,Major,,South,"Automobile, Station Wagon",Stopped,Driving Properly,Normal,,,,,,,,,Yes,,,,,,,Yes,,,,42,Banbury-Don Mills,42,Banbury-Don Mills (42),D33,10412,Spring,2014,3,7,691.0
10425,80153768,4008010000.0,2014,2014-03-07 05:00:00+00:00,650,DON VALLEY PARKWAY S,LAWRENCE AVE E,,,North York,16,43.740267,-79.33224,Mid-Block,Non Intersection,No Control,Clear,Daylight,Dry,Non-Fatal Injury,Rear End,Driver,25 to 29,Major,,South,"Automobile, Station Wagon",Slowing or Stopping,Following too Close,Inattentive,,,,,,,,,Yes,,,,,,,Yes,,,,42,Banbury-Don Mills,42,Banbury-Don Mills (42),D33,10426,Spring,2014,3,7,691.0
10321,80153506,4008010000.0,2014,2014-03-05 05:00:00+00:00,1800,FINCH AVE E,BRIDLETOWNE CRCL,,Major Arterial,Scarborough,22,43.79584,-79.319301,Intersection,Intersection Related,Traffic Signal,Clear,"Dusk, artificial",Dry,Non-Fatal Injury,Rear End,Driver,30 to 34,Major,,East,"Automobile, Station Wagon",Stopped,Driving Properly,Normal,,,,,,,,,Yes,,,,,,,,,,,147,L'Amoreaux West,117,L'Amoreaux (117),D42,10322,Spring,2014,3,5,3170.0


In [319]:
df = df.drop(['ACC_NUM_x',	'ACC_NUM_y',	'ACC_NUM2_x',	'ACC_NUM2_y'],axis=1)

In [325]:
one_accident = df[df.duplicated(subset=['TIME', 'DIVISION', 'STREET1', 'STREET2', 'NEIGHBOURHOOD_140', 'HOOD_140'], keep=False)]

# Group by the condition and assign a unique index ID
one_accident['ACC_NUM'] = one_accident.groupby(['TIME', 'DIVISION', 'STREET1', 'STREET2', 'NEIGHBOURHOOD_140', 'HOOD_140']).ngroup()

# Merge the unique index IDs back to the original DataFrame
df = pd.merge(df, one_accident[['ACC_NUM']], left_index=True, right_index=True, how='left')

# Fill NaN values in the ACC_NUM column with -1
df['ACC_NUM'] = df['ACC_NUM'].fillna(-1)

# Display the DataFrame with the assigned unique index IDs




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [327]:
df = df.sort_values(by='ACC_NUM', ascending=True)


In [328]:
df.head(30)

Unnamed: 0,INDEX_,ACCNUM,YEAR,DATE,TIME,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,WARDNUM,LATITUDE,LONGITUDE,LOCCOORD,ACCLOC,TRAFFCTL,VISIBILITY,LIGHT,RDSFCOND,ACCLASS,IMPACTYPE,INVTYPE,INVAGE,INJURY,FATAL_NO,INITDIR,VEHTYPE,MANOEUVER,DRIVACT,DRIVCOND,PEDTYPE,PEDACT,PEDCOND,CYCLISTYPE,CYCACT,CYCCOND,PEDESTRIAN,CYCLIST,AUTOMOBILE,MOTORCYCLE,TRUCK,TRSN_CITY_VEH,EMERG_VEH,PASSENGER,SPEEDING,AG_DRIV,REDLIGHT,ALCOHOL,DISABILITY,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,DIVISION,ObjectId,SEASON,Year,Month,Day,ACC_NUM2,ACC_NUM
7796,6804925,1248931.0,2011,2011-08-13 04:00:00+00:00,1800,28 REGATTA CRES,,,Local,North York,18,43.789345,-79.44389,Intersection,,No Control,Clear,Daylight,Dry,Fatal,Pedestrian Collisions,Pedestrian,75 to 79,Fatal,19.0,South,Other,,,,Pedestrian hit at private driveway,On Sidewalk or Shoulder,Unknown,,,,Yes,,Yes,,,,,,,,,,,36,Newtonbrook West,36,Newtonbrook West (36),D32,7797,Summer,2011,8,13,-1.0,-1.0
6045,6614964,1228029.0,2011,2011-03-25 04:00:00+00:00,720,315 MAIN ST,,,Minor Arterial,Toronto and East York,19,43.689044,-79.302287,Mid-Block,,No Control,Clear,Daylight,Dry,Non-Fatal Injury,Pedestrian Collisions,Pedestrian,45 to 49,Major,,East,Other,,,,Pedestrian hit at mid-block,"Crossing, no Traffic Control",Normal,,,,Yes,,Yes,,,,,,,,,,,62,East End-Danforth,62,East End-Danforth (62),D55,6046,Spring,2011,3,25,-1.0,-1.0
6044,6614963,1228029.0,2011,2011-03-25 04:00:00+00:00,720,315 MAIN ST,,,Minor Arterial,Toronto and East York,19,43.689044,-79.302287,Mid-Block,,No Control,Clear,Daylight,Dry,Non-Fatal Injury,Pedestrian Collisions,Driver,unknown,,,North,"Automobile, Station Wagon",Pulling Away from Shoulder or Curb,,,,,,,,,Yes,,Yes,,,,,,,,,,,62,East End-Danforth,62,East End-Danforth (62),D55,6045,Spring,2011,3,25,-1.0,-1.0
10565,80167945,4002525000.0,2014,2014-07-18 04:00:00+00:00,2340,3900 JANE ST,,,Major Arterial,Etobicoke York,7,43.756663,-79.517566,Mid-Block,At/Near Private Drive,No Control,Clear,Daylight,Dry,Non-Fatal Injury,Cyclist Collisions,Driver,35 to 39,,,East,"Automobile, Station Wagon",Going Ahead,Other,Unknown,,,,,,,,Yes,Yes,,,,,,,,,,,25,Glenfield-Jane Heights,25,Glenfield-Jane Heights (25),D31,10566,Summer,2014,7,18,-1.0,-1.0
10572,80167946,4002525000.0,2014,2014-07-18 04:00:00+00:00,2340,3900 JANE ST,,,Major Arterial,Etobicoke York,7,43.756663,-79.517566,Mid-Block,At/Near Private Drive,No Control,Clear,Daylight,Dry,Non-Fatal Injury,Cyclist Collisions,Cyclist,30 to 34,Major,,Unknown,Bicycle,Unknown,,,,,,Motorist without ROW drives into path of cycli...,Other,Unknown,,Yes,Yes,,,,,,,,,,,25,Glenfield-Jane Heights,25,Glenfield-Jane Heights (25),D31,10573,Summer,2014,7,18,-1.0,-1.0
14730,80941914,,2018,2018-02-23 05:00:00+00:00,118,931 PROGRESS AVE,,5 m West of,Minor Arterial,Scarborough,24,43.783801,-79.231838,Intersection,Intersection Related,Traffic Signal,Clear,"Dark, artificial",Dry,Non-Fatal Injury,SMV Other,Driver,25 to 29,Minor,,North,"Automobile, Station Wagon",Going Ahead,Other,"Ability Impaired, Alcohol Over .08",,,,,,,,,Yes,,,,,Yes,,,,Yes,,142,Woburn North,137,Woburn (137),D43,14731,Winter,2018,2,23,-1.0,-1.0
10474,80213442,4002493000.0,2014,2014-07-14 04:00:00+00:00,450,DIXON RD,CELESTINE DR,,Major Arterial,Etobicoke York,12,43.693392,-79.562503,Intersection,At Intersection,Traffic Signal,Clear,"Dark, artificial",Dry,Non-Fatal Injury,SMV Other,Driver,20 to 24,Major,,East,"Automobile, Station Wagon",Going Ahead,Lost control,Fatigue,,,,,,,,,Yes,,,,,,,,,,,7,Willowridge-Martingrove-Richview,7,Willowridge-Martingrove-Richview (7),D23,10475,Summer,2014,7,14,-1.0,-1.0
12177,80504002,,2016,2016-05-03 04:00:00+00:00,948,3 SPRUCE HILL RD,,,Local,Toronto and East York,19,43.672747,-79.288913,Mid-Block,Non Intersection,No Control,Clear,Daylight,Dry,Fatal,Pedestrian Collisions,Driver,65 to 69,,,East,"Automobile, Station Wagon",Turning Right,Lost control,Unknown,,,,,,,Yes,,Yes,,,,,,,,,,,63,The Beaches,63,The Beaches (63),D55,12178,Spring,2016,5,3,-1.0,-1.0
14723,80941913,,2018,2018-02-23 05:00:00+00:00,118,931 PROGRESS AVE,,5 m West of,Minor Arterial,Scarborough,24,43.783801,-79.231838,Intersection,Intersection Related,Traffic Signal,Clear,"Dark, artificial",Dry,Non-Fatal Injury,SMV Other,Passenger,20 to 24,Major,,,,,,,,,,,,,,,Yes,,,,,Yes,,,,Yes,,142,Woburn North,137,Woburn (137),D43,14724,Winter,2018,2,23,-1.0,-1.0
10414,80213101,4002469000.0,2014,2014-07-10 04:00:00+00:00,1235,HIGHWAY 27 N,ROYALCREST RD,,Major Arterial,Etobicoke York,1,43.749705,-79.610454,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Non-Fatal Injury,SMV Other,Driver,60 to 64,Major,,South,"Automobile, Station Wagon",Going Ahead,Lost control,Unknown,,,,,,,,,Yes,,,,,,,,,,,1,West Humber-Clairville,1,West Humber-Clairville (1),D23,10415,Summer,2014,7,10,-1.0,-1.0


In [None]:
df[df.duplicated(subset=['DATE','TIME','ACC_NUM'], keep=False)]

In [None]:
pd.set_option("display.max_columns", None)


In [None]:
df['ACC_NUM'].replace(-1, np.nan, inplace=True)


In [333]:
df.drop(["ACC_NUM2"], axis=1)

In [332]:
df.head(30)

Unnamed: 0,INDEX_,ACCNUM,YEAR,DATE,TIME,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,WARDNUM,LATITUDE,LONGITUDE,LOCCOORD,ACCLOC,TRAFFCTL,VISIBILITY,LIGHT,RDSFCOND,ACCLASS,IMPACTYPE,INVTYPE,INVAGE,INJURY,FATAL_NO,INITDIR,VEHTYPE,MANOEUVER,DRIVACT,DRIVCOND,PEDTYPE,PEDACT,PEDCOND,CYCLISTYPE,CYCACT,CYCCOND,PEDESTRIAN,CYCLIST,AUTOMOBILE,MOTORCYCLE,TRUCK,TRSN_CITY_VEH,EMERG_VEH,PASSENGER,SPEEDING,AG_DRIV,REDLIGHT,ALCOHOL,DISABILITY,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,DIVISION,ObjectId,SEASON,Year,Month,Day,ACC_NUM2,ACC_NUM
7796,6804925,1248931.0,2011,2011-08-13 04:00:00+00:00,1800,28 REGATTA CRES,,,Local,North York,18,43.789345,-79.44389,Intersection,,No Control,Clear,Daylight,Dry,Fatal,Pedestrian Collisions,Pedestrian,75 to 79,Fatal,19.0,South,Other,,,,Pedestrian hit at private driveway,On Sidewalk or Shoulder,Unknown,,,,Yes,,Yes,,,,,,,,,,,36,Newtonbrook West,36,Newtonbrook West (36),D32,7797,Summer,2011,8,13,-1.0,-1.0
6045,6614964,1228029.0,2011,2011-03-25 04:00:00+00:00,720,315 MAIN ST,,,Minor Arterial,Toronto and East York,19,43.689044,-79.302287,Mid-Block,,No Control,Clear,Daylight,Dry,Non-Fatal Injury,Pedestrian Collisions,Pedestrian,45 to 49,Major,,East,Other,,,,Pedestrian hit at mid-block,"Crossing, no Traffic Control",Normal,,,,Yes,,Yes,,,,,,,,,,,62,East End-Danforth,62,East End-Danforth (62),D55,6046,Spring,2011,3,25,-1.0,-1.0
6044,6614963,1228029.0,2011,2011-03-25 04:00:00+00:00,720,315 MAIN ST,,,Minor Arterial,Toronto and East York,19,43.689044,-79.302287,Mid-Block,,No Control,Clear,Daylight,Dry,Non-Fatal Injury,Pedestrian Collisions,Driver,unknown,,,North,"Automobile, Station Wagon",Pulling Away from Shoulder or Curb,,,,,,,,,Yes,,Yes,,,,,,,,,,,62,East End-Danforth,62,East End-Danforth (62),D55,6045,Spring,2011,3,25,-1.0,-1.0
10565,80167945,4002525000.0,2014,2014-07-18 04:00:00+00:00,2340,3900 JANE ST,,,Major Arterial,Etobicoke York,7,43.756663,-79.517566,Mid-Block,At/Near Private Drive,No Control,Clear,Daylight,Dry,Non-Fatal Injury,Cyclist Collisions,Driver,35 to 39,,,East,"Automobile, Station Wagon",Going Ahead,Other,Unknown,,,,,,,,Yes,Yes,,,,,,,,,,,25,Glenfield-Jane Heights,25,Glenfield-Jane Heights (25),D31,10566,Summer,2014,7,18,-1.0,-1.0
10572,80167946,4002525000.0,2014,2014-07-18 04:00:00+00:00,2340,3900 JANE ST,,,Major Arterial,Etobicoke York,7,43.756663,-79.517566,Mid-Block,At/Near Private Drive,No Control,Clear,Daylight,Dry,Non-Fatal Injury,Cyclist Collisions,Cyclist,30 to 34,Major,,Unknown,Bicycle,Unknown,,,,,,Motorist without ROW drives into path of cycli...,Other,Unknown,,Yes,Yes,,,,,,,,,,,25,Glenfield-Jane Heights,25,Glenfield-Jane Heights (25),D31,10573,Summer,2014,7,18,-1.0,-1.0
14730,80941914,,2018,2018-02-23 05:00:00+00:00,118,931 PROGRESS AVE,,5 m West of,Minor Arterial,Scarborough,24,43.783801,-79.231838,Intersection,Intersection Related,Traffic Signal,Clear,"Dark, artificial",Dry,Non-Fatal Injury,SMV Other,Driver,25 to 29,Minor,,North,"Automobile, Station Wagon",Going Ahead,Other,"Ability Impaired, Alcohol Over .08",,,,,,,,,Yes,,,,,Yes,,,,Yes,,142,Woburn North,137,Woburn (137),D43,14731,Winter,2018,2,23,-1.0,-1.0
10474,80213442,4002493000.0,2014,2014-07-14 04:00:00+00:00,450,DIXON RD,CELESTINE DR,,Major Arterial,Etobicoke York,12,43.693392,-79.562503,Intersection,At Intersection,Traffic Signal,Clear,"Dark, artificial",Dry,Non-Fatal Injury,SMV Other,Driver,20 to 24,Major,,East,"Automobile, Station Wagon",Going Ahead,Lost control,Fatigue,,,,,,,,,Yes,,,,,,,,,,,7,Willowridge-Martingrove-Richview,7,Willowridge-Martingrove-Richview (7),D23,10475,Summer,2014,7,14,-1.0,-1.0
12177,80504002,,2016,2016-05-03 04:00:00+00:00,948,3 SPRUCE HILL RD,,,Local,Toronto and East York,19,43.672747,-79.288913,Mid-Block,Non Intersection,No Control,Clear,Daylight,Dry,Fatal,Pedestrian Collisions,Driver,65 to 69,,,East,"Automobile, Station Wagon",Turning Right,Lost control,Unknown,,,,,,,Yes,,Yes,,,,,,,,,,,63,The Beaches,63,The Beaches (63),D55,12178,Spring,2016,5,3,-1.0,-1.0
14723,80941913,,2018,2018-02-23 05:00:00+00:00,118,931 PROGRESS AVE,,5 m West of,Minor Arterial,Scarborough,24,43.783801,-79.231838,Intersection,Intersection Related,Traffic Signal,Clear,"Dark, artificial",Dry,Non-Fatal Injury,SMV Other,Passenger,20 to 24,Major,,,,,,,,,,,,,,,Yes,,,,,Yes,,,,Yes,,142,Woburn North,137,Woburn (137),D43,14724,Winter,2018,2,23,-1.0,-1.0
10414,80213101,4002469000.0,2014,2014-07-10 04:00:00+00:00,1235,HIGHWAY 27 N,ROYALCREST RD,,Major Arterial,Etobicoke York,1,43.749705,-79.610454,Intersection,At Intersection,Traffic Signal,Clear,Daylight,Dry,Non-Fatal Injury,SMV Other,Driver,60 to 64,Major,,South,"Automobile, Station Wagon",Going Ahead,Lost control,Unknown,,,,,,,,,Yes,,,,,,,,,,,1,West Humber-Clairville,1,West Humber-Clairville (1),D23,10415,Summer,2014,7,10,-1.0,-1.0
