In [164]:
from numpy import genfromtxt
from time import time
from datetime import datetime
from dateutil.relativedelta import relativedelta
from sqlalchemy import Column, Integer, Float, Date, String, VARCHAR
from sqlalchemy.ext.declarative import declarative_base
import sqlite3
from sqlalchemy import create_engine
from sqlalchemy.orm import session
import csv
import pandas as pd

In [118]:
file_path = "Resources/CTA_Ridership_Monthly_Day_Type_Averages.csv"
cta_monthly_df = pd.read_csv(file_path)
cta_monthly_df.head()

Unnamed: 0,station_id,stationame,month_beginning,avg_weekday_rides,avg_saturday_rides,avg_sunday-holiday_rides,monthtotal
0,40900,Howard,1/1/2001,6233.9,3814.5,2408.6,164447
1,41190,Jarvis,1/1/2001,1489.1,1054.0,718.0,40567
2,40100,Morse,1/1/2001,4412.5,3064.5,2087.8,119772
3,41300,Loyola,1/1/2001,4664.5,3156.0,1952.8,125008
4,40760,Granville,1/1/2001,3109.8,2126.0,1453.8,84189


In [119]:
cta_monthly_df.dtypes

station_id                    int64
stationame                   object
month_beginning              object
avg_weekday_rides           float64
avg_saturday_rides          float64
avg_sunday-holiday_rides    float64
monthtotal                    int64
dtype: object

In [120]:
cta_monthly_df['date'] = pd.to_datetime(cta_monthly_df['month_beginning'])
cta_monthly_df.head()

Unnamed: 0,station_id,stationame,month_beginning,avg_weekday_rides,avg_saturday_rides,avg_sunday-holiday_rides,monthtotal,date
0,40900,Howard,1/1/2001,6233.9,3814.5,2408.6,164447,2001-01-01
1,41190,Jarvis,1/1/2001,1489.1,1054.0,718.0,40567,2001-01-01
2,40100,Morse,1/1/2001,4412.5,3064.5,2087.8,119772,2001-01-01
3,41300,Loyola,1/1/2001,4664.5,3156.0,1952.8,125008,2001-01-01
4,40760,Granville,1/1/2001,3109.8,2126.0,1453.8,84189,2001-01-01


In [121]:
cta_monthly_df.dtypes

station_id                           int64
stationame                          object
month_beginning                     object
avg_weekday_rides                  float64
avg_saturday_rides                 float64
avg_sunday-holiday_rides           float64
monthtotal                           int64
date                        datetime64[ns]
dtype: object

In [122]:
cta_monthly_df_rename = cta_monthly_df.rename(columns={"station_id": "Station_ID", "stationame":"Station_Name"})
cta_monthly_df_rename.head()

Unnamed: 0,Station_ID,Station_Name,month_beginning,avg_weekday_rides,avg_saturday_rides,avg_sunday-holiday_rides,monthtotal,date
0,40900,Howard,1/1/2001,6233.9,3814.5,2408.6,164447,2001-01-01
1,41190,Jarvis,1/1/2001,1489.1,1054.0,718.0,40567,2001-01-01
2,40100,Morse,1/1/2001,4412.5,3064.5,2087.8,119772,2001-01-01
3,41300,Loyola,1/1/2001,4664.5,3156.0,1952.8,125008,2001-01-01
4,40760,Granville,1/1/2001,3109.8,2126.0,1453.8,84189,2001-01-01


In [123]:
# Reorganizing the columns
cta_monthly_df_reorg = cta_monthly_df_rename[["Station_ID", "Station_Name","date", "avg_weekday_rides", "avg_saturday_rides",
                                "avg_sunday-holiday_rides", "monthtotal"]]
cta_monthly_df_reorg.head()

Unnamed: 0,Station_ID,Station_Name,date,avg_weekday_rides,avg_saturday_rides,avg_sunday-holiday_rides,monthtotal
0,40900,Howard,2001-01-01,6233.9,3814.5,2408.6,164447
1,41190,Jarvis,2001-01-01,1489.1,1054.0,718.0,40567
2,40100,Morse,2001-01-01,4412.5,3064.5,2087.8,119772
3,41300,Loyola,2001-01-01,4664.5,3156.0,1952.8,125008
4,40760,Granville,2001-01-01,3109.8,2126.0,1453.8,84189


# Extract 10 Year ridership average for each station

In [124]:
max_date = cta_monthly_df_reorg['date'].max()
max_date

Timestamp('2018-12-01 00:00:00')

In [125]:
ten_yrs_ago = max_date - relativedelta(years=11)
ten_yrs_ago

Timestamp('2007-12-01 00:00:00')

In [126]:
cta_monthly_ten_years_data = cta_monthly_df_reorg[cta_monthly_df_reorg['date'] > ten_yrs_ago]

cta_monthly_ten_years_data.head()

Unnamed: 0,Station_ID,Station_Name,date,avg_weekday_rides,avg_saturday_rides,avg_sunday-holiday_rides,monthtotal
11924,40900,Howard,2008-01-01,5455.5,3572.3,2476.0,146691
11925,41190,Jarvis,2008-01-01,1391.5,994.8,747.8,38330
11926,40100,Morse,2008-01-01,3764.5,2555.8,1892.2,102504
11927,41300,Loyola,2008-01-01,4357.1,2972.3,2116.6,118328
11928,40760,Granville,2008-01-01,3060.5,2277.8,1596.2,84424


In [127]:
cta_monthly_ten_years_data['date'] = pd.to_datetime(cta_monthly_ten_years_data['date']).dt.to_period('y')
cta_monthly_ten_years_data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Station_ID,Station_Name,date,avg_weekday_rides,avg_saturday_rides,avg_sunday-holiday_rides,monthtotal
11924,40900,Howard,2008,5455.5,3572.3,2476.0,146691
11925,41190,Jarvis,2008,1391.5,994.8,747.8,38330
11926,40100,Morse,2008,3764.5,2555.8,1892.2,102504
11927,41300,Loyola,2008,4357.1,2972.3,2116.6,118328
11928,40760,Granville,2008,3060.5,2277.8,1596.2,84424


In [185]:
ten_year_data = cta_monthly_ten_years_data.pivot_table(index=['Station_ID','Station_Name'], 
                    columns='date', 
                    values='monthtotal', 
                    aggfunc='mean')
ten_year_data.head()

Unnamed: 0_level_0,date,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
Station_ID,Station_Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
40010,Austin-Forest Park,46823.75,46341.75,48507.0,51768.416667,53616.5,53141.416667,53227.333333,52914.333333,52216.083333,49944.166667,47472.166667
40020,Harlem-Lake,101845.583333,95363.0,95740.333333,96599.083333,100778.25,102351.416667,106548.416667,105530.083333,97463.5,97634.916667,97086.583333
40030,Pulaski-Lake,45024.833333,48082.25,53863.666667,54576.166667,55984.583333,51132.083333,50526.916667,47168.25,40027.916667,37620.333333,37237.5
40040,Quincy/Wells,168745.333333,170798.416667,166625.583333,183112.75,184427.083333,185044.416667,190696.083333,182758.083333,186506.083333,185204.416667,181934.916667
40050,Davis,105441.0,103588.5,102167.5,103838.75,104414.833333,103015.666667,104849.916667,103930.083333,100214.166667,97038.833333,96000.416667


In [186]:
ten_year_data = ten_year_data.reset_index().rename_axis(None).rename_axis(None, axis=1)
ten_year_data.head()

Unnamed: 0,Station_ID,Station_Name,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,40010,Austin-Forest Park,46823.75,46341.75,48507.0,51768.416667,53616.5,53141.416667,53227.333333,52914.333333,52216.083333,49944.166667,47472.166667
1,40020,Harlem-Lake,101845.583333,95363.0,95740.333333,96599.083333,100778.25,102351.416667,106548.416667,105530.083333,97463.5,97634.916667,97086.583333
2,40030,Pulaski-Lake,45024.833333,48082.25,53863.666667,54576.166667,55984.583333,51132.083333,50526.916667,47168.25,40027.916667,37620.333333,37237.5
3,40040,Quincy/Wells,168745.333333,170798.416667,166625.583333,183112.75,184427.083333,185044.416667,190696.083333,182758.083333,186506.083333,185204.416667,181934.916667
4,40050,Davis,105441.0,103588.5,102167.5,103838.75,104414.833333,103015.666667,104849.916667,103930.083333,100214.166667,97038.833333,96000.416667


In [187]:
ten_year_data['Station_ID'].nunique()

146

In [191]:
engine = create_engine('sqlite:///ridership_data.sqlite', echo=False)

In [192]:
# add 10 year ridership data to the database 

ten_year_data.to_sql('ten_year_data', con=engine, if_exists='replace')

In [194]:
engine.execute("SELECT * FROM ten_year_data").fetchall()

[(0, 40010, 'Austin-Forest Park', 46823.75, 46341.75, 48507.0, 51768.416666666664, 53616.5, 53141.416666666664, 53227.333333333336, 52914.333333333336, 52216.083333333336, 49944.166666666664, 47472.166666666664),
 (1, 40020, 'Harlem-Lake', 101845.58333333333, 95363.0, 95740.33333333333, 96599.08333333333, 100778.25, 102351.41666666667, 106548.41666666667, 105530.08333333333, 97463.5, 97634.91666666667, 97086.58333333333),
 (2, 40030, 'Pulaski-Lake', 45024.833333333336, 48082.25, 53863.666666666664, 54576.166666666664, 55984.583333333336, 51132.083333333336, 50526.916666666664, 47168.25, 40027.916666666664, 37620.333333333336, 37237.5),
 (3, 40040, 'Quincy/Wells', 168745.33333333334, 170798.41666666666, 166625.58333333334, 183112.75, 184427.08333333334, 185044.41666666666, 190696.08333333334, 182758.08333333334, 186506.08333333334, 185204.41666666666, 181934.91666666666),
 (4, 40050, 'Davis', 105441.0, 103588.5, 102167.5, 103838.75, 104414.83333333333, 103015.66666666667, 104849.9166666

In [32]:
max_date = cta_monthly_df_reorg['date'].max()
max_date

Timestamp('2018-12-01 00:00:00')

In [59]:
one_yrs_ago = max_date - relativedelta(years=1)
one_yrs_ago

Timestamp('2017-12-01 00:00:00')

In [74]:
cta_monthly_one_years_data = cta_monthly_df_reorg[cta_monthly_df_reorg['date'] > one_yrs_ago]

cta_monthly_one_years_data.head()

Unnamed: 0,Station_ID,Station_Name,date,avg_weekday_rides,avg_saturday_rides,avg_sunday-holiday_rides,monthtotal
29027,40580,54th/Cermak,2018-01-01,1887.9,985.8,605.6,48504
29028,41020,Logan Square,2018-01-01,6877.0,3727.5,2437.6,178391
29029,40800,Sedgwick,2018-01-01,3453.3,1881.0,1243.8,89715
29030,40140,Dempster-Skokie,2018-01-01,1650.7,713.8,478.4,41562
29031,41170,Garfield-Dan Ryan,2018-01-01,2844.7,2193.0,1416.8,78440


In [75]:
# cta_monthly_one_years_data['date'] = pd.to_datetime(cta_monthly_one_years_data["date"].dt.strftime('%Y-%m'))
cta_monthly_one_years_data['date'] = pd.to_datetime(cta_monthly_one_years_data['date']).dt.to_period('y')
cta_monthly_one_years_data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,Station_ID,Station_Name,date,avg_weekday_rides,avg_saturday_rides,avg_sunday-holiday_rides,monthtotal
29027,40580,54th/Cermak,2018,1887.9,985.8,605.6,48504
29028,41020,Logan Square,2018,6877.0,3727.5,2437.6,178391
29029,40800,Sedgwick,2018,3453.3,1881.0,1243.8,89715
29030,40140,Dempster-Skokie,2018,1650.7,713.8,478.4,41562
29031,41170,Garfield-Dan Ryan,2018,2844.7,2193.0,1416.8,78440


In [66]:
df = cta_monthly_one_years_data.pivot_table(index=['Station_ID','Station_Name'], 
                    columns='date', 
                    values='avg_weekday_rides', 
                    aggfunc='mean')
df.head()

Unnamed: 0_level_0,date,2018-01,2018-02,2018-03,2018-04,2018-05,2018-06,2018-07,2018-08,2018-09,2018-10,2018-11,2018-12
Station_ID,Station_Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
40010,Austin-Forest Park,1766.2,1870.4,1846.6,1965.9,1927.0,1910.1,1889.3,1927.5,2216.7,2182.9,1973.6,1734.9
40020,Harlem-Lake,3371.5,3499.7,3599.9,3700.1,3818.1,3932.4,3967.4,4006.1,4075.2,3948.2,3669.5,3375.4
40030,Pulaski-Lake,1208.5,1277.3,1297.0,1358.0,1427.4,1417.9,1409.3,1388.4,1489.8,1497.1,1445.0,1537.9
40040,Quincy/Wells,7763.0,7647.4,7893.8,7993.3,8067.4,8194.6,8293.8,8211.8,8499.9,8549.1,7745.1,6677.6
40050,Davis,3374.9,3648.7,3621.4,3779.2,3800.8,3884.7,3880.3,3597.2,3665.9,3782.2,3553.7,3088.3


In [68]:
df = df.reset_index().rename_axis(None).rename_axis(None, axis=1)
df.head()

Unnamed: 0,index,Station_ID,Station_Name,2018-01,2018-02,2018-03,2018-04,2018-05,2018-06,2018-07,2018-08,2018-09,2018-10,2018-11,2018-12
0,0,40010,Austin-Forest Park,1766.2,1870.4,1846.6,1965.9,1927.0,1910.1,1889.3,1927.5,2216.7,2182.9,1973.6,1734.9
1,1,40020,Harlem-Lake,3371.5,3499.7,3599.9,3700.1,3818.1,3932.4,3967.4,4006.1,4075.2,3948.2,3669.5,3375.4
2,2,40030,Pulaski-Lake,1208.5,1277.3,1297.0,1358.0,1427.4,1417.9,1409.3,1388.4,1489.8,1497.1,1445.0,1537.9
3,3,40040,Quincy/Wells,7763.0,7647.4,7893.8,7993.3,8067.4,8194.6,8293.8,8211.8,8499.9,8549.1,7745.1,6677.6
4,4,40050,Davis,3374.9,3648.7,3621.4,3779.2,3800.8,3884.7,3880.3,3597.2,3665.9,3782.2,3553.7,3088.3


In [None]:
# Fetch Data
    # pull in cta data usng the api link, not local files for the data

# Clean Data
    # combine tables by station id, get rid of extra columns
    # update the location column to two separate lat/long columns
    # standaridze the column names

# Manipulate Data
    # add coloumn for year, based on date
    # create new dataframe that is ridership data by year      

# Send table to sqlite database


# get flask app, up and running, how get data out as geojson out for maps 

In [19]:
Base = declarative_base()

In [20]:
# Create sysetm database from City of Chicago webiste 
engine = create_engine('sqlite:///cta.db')
Base.metadata.create_all(engine)


In [21]:
# add daily ridership table to the database 
url = 'https://data.cityofchicago.org/resource/5neh-572f.json'
daily_df = pd.read_json(url)
daily_df.head()
daily_df.to_sql('daily_ridership', con=engine, index_label='id', if_exists='replace')


In [12]:
url = 'https://data.cityofchicago.org/resource/8pix-ypme.csv'
system2_df = pd.read_csv(url)
system2_df.dtypes

stop_id                          int64
direction_id                    object
stop_name                       object
station_name                    object
station_descriptive_name        object
map_id                           int64
ada                               bool
red                               bool
blue                              bool
g                                 bool
brn                               bool
p                                 bool
pexp                              bool
y                                 bool
pnk                               bool
o                                 bool
location                        object
:@computed_region_awaf_s7ux    float64
:@computed_region_6mkv_f3dw      int64
:@computed_region_vrxf_vc4k    float64
:@computed_region_bdys_3d7i    float64
:@computed_region_43wa_7qmu    float64
dtype: object

In [11]:
system_df.drop([':@computed_region_6mkv_f3dw '])


#  Delete multiple columns from the dataframe
# data = data.drop(["Y2001", "Y2002", "Y2003"], axis=1)

# df.to_sql('system_info', con=engine, index_label='id', if_exists='replace')

KeyError: "[':@computed_region_6mkv_f3dw '] not found in axis"

In [13]:
from sqlalchemy.orm import Session
engine = create_engine("sqlite:///cta.db")
conn = engine.connect()
Base.metadata.create_all(engine)

session = Session(bind=engine)


In [14]:

engine.execute("SELECT * FROM daily_ridership WHERE daytype = 'U'").fetchall()

[(0, '2001-01-01 00:00:00.000000', 'U', 273, 40350, 'UIC-Halsted'),
 (1, '2001-01-01 00:00:00.000000', 'U', 306, 41130, 'Halsted-Orange'),
 (2, '2001-01-01 00:00:00.000000', 'U', 1059, 40760, 'Granville'),
 (3, '2001-01-01 00:00:00.000000', 'U', 649, 40070, 'Jackson/Dearborn'),
 (4, '2001-01-01 00:00:00.000000', 'U', 411, 40090, 'Damen-Brown'),
 (5, '2001-01-01 00:00:00.000000', 'U', 870, 40590, 'Damen/Milwaukee'),
 (6, '2001-01-01 00:00:00.000000', 'U', 391, 40720, 'East 63rd-Cottage Grove'),
 (7, '2001-01-01 00:00:00.000000', 'U', 399, 41260, 'Austin-Lake'),
 (8, '2001-01-01 00:00:00.000000', 'U', 788, 40230, 'Cumberland'),
 (9, '2001-01-01 00:00:00.000000', 'U', 448, 41120, '35-Bronzeville-IIT'),
 (10, '2001-01-01 00:00:00.000000', 'U', 479, 40810, 'Medical Center'),
 (11, '2001-01-01 00:00:00.000000', 'U', 2542, 40330, 'Grand/State'),
 (12, '2001-01-01 00:00:00.000000', 'U', 176, 41050, 'Linden'),
 (13, '2001-01-01 00:00:00.000000', 'U', 0, 40140, 'Skokie'),
 (14, '2001-01-01 00:00

In [160]:
import os

import pandas as pd
import numpy as np

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine

from flask import Flask, jsonify, render_template
from flask_sqlalchemy import SQLAlchemy

app = Flask(__name__)


#################################################
# Database Setup
#################################################

app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///ridership_data.sqlite"
db = SQLAlchemy(app)

  'SQLALCHEMY_TRACK_MODIFICATIONS adds significant overhead and '


In [161]:
Base = automap_base()
# reflect the tables
Base.prepare(db.engine, reflect=True)

# Save references to each table
Stations_Metadata = Base.classes.data

AttributeError: data