# Chicago Average Daily Traffic Data EDA (2006 Chicago Traffic Data)

## Data Preparation and Cleaning

In [11]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
chi_avg_daily_df = pd.read_csv("./traffic_datasets/Average_Daily_Traffic_Counts.csv")

In [3]:
chi_avg_daily_df

Unnamed: 0,ID,Traffic Volume Count Location Address,Street,Date of Count,Total Passing Vehicle Volume,Vehicle Volume By Each Direction of Traffic,Latitude,Longitude,Location
0,414,5838 West,Lake St,11/14/2006,7100,East Bound: 3600 / West Bound: 3500,41.887904,-87.771064,"(41.887904, -87.771064)"
1,176,320 East,76th St,03/28/2006,8600,East Bound: 3800 / West Bound: 4800,41.756542,-87.617335,"(41.756542, -87.617335)"
2,1367,1730 East,57th Dr,08/24/2006,53500,East Bound: 27800 / West Bound: 25700,41.792663,-87.582231,"(41.792663, -87.582231)"
3,316,125 East,24th St,03/30/2006,700,East Bound: 400 / West Bound: 300,41.849302,-87.622658,"(41.849302, -87.622658)"
4,1294,2924 East,130th St,08/29/2006,4200,East Bound: 2300 / West Bound: 1900,41.659177,-87.552112,"(41.659177, -87.552112)"
...,...,...,...,...,...,...,...,...,...
1274,257,256 East,35th St,03/29/2006,10200,East Bound: 4800 / West Bound: 5400,41.831081,-87.619334,"(41.831081, -87.619334)"
1275,950,2032 North,Pulaski Rd,10/05/2006,21300,North Bound: 10800 / South Bound: 10500,41.918169,-87.726590,"(41.918169, -87.72659)"
1276,118,6020 South,Halsted St,03/21/2006,13200,North Bound: 6900 / South Bound: 6300,41.784530,-87.644905,"(41.78453, -87.644905)"
1277,1091,3454 West,Franklin St,10/19/2006,5000,East Bound: 2200 / West Bound: 2800,41.890003,-87.713444,"(41.890003, -87.713444)"


In [4]:
chi_avg_daily_df.columns

Index(['ID ', 'Traffic Volume Count Location  Address', 'Street',
       'Date of Count', 'Total Passing Vehicle Volume',
       'Vehicle Volume By Each Direction of Traffic', 'Latitude', 'Longitude',
       'Location'],
      dtype='object')

In [5]:
chi_avg_daily_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1279 entries, 0 to 1278
Data columns (total 9 columns):
 #   Column                                       Non-Null Count  Dtype  
---  ------                                       --------------  -----  
 0   ID                                           1279 non-null   int64  
 1   Traffic Volume Count Location  Address       1279 non-null   object 
 2   Street                                       1279 non-null   object 
 3   Date of Count                                1279 non-null   object 
 4   Total Passing Vehicle Volume                 1279 non-null   int64  
 5   Vehicle Volume By Each Direction of Traffic  1279 non-null   object 
 6   Latitude                                     1279 non-null   float64
 7   Longitude                                    1279 non-null   float64
 8   Location                                     1279 non-null   object 
dtypes: float64(2), int64(2), object(5)
memory usage: 90.1+ KB


In [7]:
chi_avg_daily_df[["Total Passing Vehicle Volume", "Latitude", "Longitude"]].describe()

Unnamed: 0,Total Passing Vehicle Volume,Latitude,Longitude
count,1279.0,1279.0,1279.0
mean,19466.301798,41.849394,-87.670298
std,12448.378467,0.083917,0.060891
min,700.0,41.651861,-87.854842
25%,11350.0,41.780249,-87.710013
50%,17300.0,41.87138,-87.66285
75%,24200.0,41.909196,-87.628631
max,165200.0,42.017888,-87.529702


### No missing values found

In [8]:
chi_avg_daily_df.isna().sum()

ID                                             0
Traffic Volume Count Location  Address         0
Street                                         0
Date of Count                                  0
Total Passing Vehicle Volume                   0
Vehicle Volume By Each Direction of Traffic    0
Latitude                                       0
Longitude                                      0
Location                                       0
dtype: int64

## Data Exploratory Analysis and Visualization 

### Questions seeking to answer: 

#### 1. Which streets have the largest total passing vehicle volume?
#### 2. Is direction of travel a good indicator of large traffic volume?

In [10]:
total_pass_veh_volume_df = chi_avg_daily_df.sort_values(by='Total Passing Vehicle Volume', ascending=False)
total_pass_veh_volume_df

Unnamed: 0,ID,Traffic Volume Count Location Address,Street,Date of Count,Total Passing Vehicle Volume,Vehicle Volume By Each Direction of Traffic,Latitude,Longitude,Location
240,380,1550 South,Lake Shore Dr,04/05/2006,165200,North Bound: 86200 / South Bound: 79000,41.860562,-87.617934,"(41.860562, -87.617934)"
353,381,5250 South,Lake Shore Dr,04/05/2006,111600,North Bound: 52400 / South Bound: 59200,41.799972,-87.581471,"(41.799972, -87.581471)"
925,1368,5423 South,Lake Shore Dr,09/20/2006,103100,North Bound: 54200 / South Bound: 48900,41.797484,-87.580352,"(41.797484, -87.580352)"
158,1365,7139 South,Stony Island Ave,08/24/2006,81100,North Bound: 46300 / South Bound: 34800,41.764923,-87.585887,"(41.764923, -87.585887)"
1070,453,105 West,Congress Pkwy,09/27/2006,76700,East Bound: 37000 / West Bound: 39700,41.875606,-87.630878,"(41.875606, -87.630878)"
...,...,...,...,...,...,...,...,...,...
940,363,3027 South,Pitney Ct,05/04/2006,1800,North Bound: 800 / South Bound: 1000,41.838528,-87.662425,"(41.838528, -87.662425)"
1157,323,3036 East,83rd Pl,03/28/2006,1300,East Bound: 500 / West Bound: 800,41.743472,-87.550548,"(41.743472, -87.550548)"
397,361,7409 South,Parnell Ave,10/04/2006,1200,South Bound: 1200/ Oneway South Bound,41.759562,-87.638479,"(41.759562, -87.638479)"
994,1373,354 West,109th St,10/04/2006,800,West Bound: 800/ Oneway West Bound,41.696116,-87.632637,"(41.696116, -87.632637)"


In [21]:
new_df = total_pass_veh_volume_df.groupby('Street')['Total Passing Vehicle Volume'].sum().reset_index()
plot_df = new_df.sort_values(by='Total Passing Vehicle Volume', ascending=False)
# plt.ylabel('Total Passing Vehicle Volume By Street', size=12)
# plt.xlabel('Street', size=12)
# plt.figure(figsize=(100,25))
# plt.scatter(plot_df['Street'], plot_df['Total Passing Vehicle Volume'])
# plt.show()

### Streets with the highest total passing vehicle volume

In [20]:
plot_df.head(20)

Unnamed: 0,Street,Total Passing Vehicle Volume
243,Western Ave,1112200
67,Ashland Ave,992700
204,Pulaski Rd,830400
93,Cicero Ave,722900
108,Damen Ave,616600
133,Halsted St,544700
226,Stony Island Ave,523600
175,Michigan Ave,450600
157,Lake Shore Dr,433700
146,Irving Park Rd,432600


In [24]:
direction_df = total_pass_veh_volume_df.groupby('Vehicle Volume By Each Direction of Traffic')['Total Passing Vehicle Volume'].sum().reset_index()
new_direction_df = direction_df.sort_values(by='Total Passing Vehicle Volume', ascending=False)
new_direction_df.head(20)

Unnamed: 0,Vehicle Volume By Each Direction of Traffic,Total Passing Vehicle Volume
1104,North Bound: 86200 / South Bound: 79000,165200
948,North Bound: 52400 / South Bound: 59200,111600
957,North Bound: 54200 / South Bound: 48900,103100
929,North Bound: 46300 / South Bound: 34800,81100
262,East Bound: 37000 / West Bound: 39700,76700
212,East Bound: 21300 / West Bound: 16600,75800
901,North Bound: 38900 / South Bound: 34000,72900
257,East Bound: 36200 / West Bound: 36200,72400
898,North Bound: 37700 / South Bound: 33500,71200
895,North Bound: 36100 / South Bound: 34800,70900


### Conclusions Based on Data:

#### 1. Going North/South has the largest traffic volume
#### 2. Specific streets also produce more traffic volume than other streets