In [12]:
from numpy import genfromtxt
from time import time
from datetime import datetime
from dateutil.relativedelta import relativedelta
from sqlalchemy import Column, Integer, Float, Date, String, VARCHAR
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.orm import session
import csv
import pandas as pd

In [33]:
# url = "https://data.cityofchicago.org/resource/t2rn-p8d7.csv"
url = "https://data.cityofchicago.org/resource/t2rn-p8d7.json"
cta_monthly_df = pd.read_json(url)
cta_monthly_df.tail()

Unnamed: 0,avg_saturday_rides,avg_sunday_holiday_rides,avg_weekday_rides,month_beginning,monthtotal,station_id,stationame
995,1916.0,1470.0,2632.2,2001-08-01T00:00:00.000,74085,40880,Thorndale
996,2898.5,2334.3,4296.5,2001-08-01T00:00:00.000,119751,41380,Bryn Mawr
997,2312.0,1839.8,3293.5,2001-08-01T00:00:00.000,92358,40340,Berwyn
998,1930.5,1597.3,2685.0,2001-08-01T00:00:00.000,75867,41200,Argyle
999,1732.3,1517.0,2618.7,2001-08-01T00:00:00.000,73226,40770,Lawrence


In [4]:
cta_monthly_df.dtypes

station_id                    int64
stationame                   object
month_beginning              object
avg_weekday_rides           float64
avg_saturday_rides          float64
avg_sunday_holiday_rides    float64
monthtotal                    int64
dtype: object

In [5]:
cta_monthly_df['date'] = pd.to_datetime(cta_monthly_df['month_beginning'])
cta_monthly_df.head()

Unnamed: 0,station_id,stationame,month_beginning,avg_weekday_rides,avg_saturday_rides,avg_sunday_holiday_rides,monthtotal,date
0,40900,Howard,2001-01-01T00:00:00.000,6233.9,3814.5,2408.6,164447,2001-01-01
1,41190,Jarvis,2001-01-01T00:00:00.000,1489.1,1054.0,718.0,40567,2001-01-01
2,40100,Morse,2001-01-01T00:00:00.000,4412.5,3064.5,2087.8,119772,2001-01-01
3,41300,Loyola,2001-01-01T00:00:00.000,4664.5,3156.0,1952.8,125008,2001-01-01
4,40760,Granville,2001-01-01T00:00:00.000,3109.8,2126.0,1453.8,84189,2001-01-01


In [6]:
cta_monthly_df.dtypes

station_id                           int64
stationame                          object
month_beginning                     object
avg_weekday_rides                  float64
avg_saturday_rides                 float64
avg_sunday_holiday_rides           float64
monthtotal                           int64
date                        datetime64[ns]
dtype: object

In [8]:
# Reorganizing the columns
cta_monthly_df_reorg = cta_monthly_df[["station_id", "stationame","date", "avg_weekday_rides", "avg_saturday_rides",
                                "avg_sunday_holiday_rides", "monthtotal"]]
cta_monthly_df_reorg.head()

Unnamed: 0,station_id,stationame,date,avg_weekday_rides,avg_saturday_rides,avg_sunday_holiday_rides,monthtotal
0,40900,Howard,2001-01-01,6233.9,3814.5,2408.6,164447
1,41190,Jarvis,2001-01-01,1489.1,1054.0,718.0,40567
2,40100,Morse,2001-01-01,4412.5,3064.5,2087.8,119772
3,41300,Loyola,2001-01-01,4664.5,3156.0,1952.8,125008
4,40760,Granville,2001-01-01,3109.8,2126.0,1453.8,84189


In [28]:
cta_monthly_df_rename = cta_monthly_df.rename(columns={"station_id": "Station_ID", "stationame":"Station_Name"})
cta_monthly_df_rename.tail()

Unnamed: 0,Station_ID,Station_Name,date,avg_weekday_rides,avg_saturday_rides,avg_sunday_holiday_rides,monthtotal
995,40880,Thorndale,2001-08-01,2632.2,1916.0,1470.0,74085
996,41380,Bryn Mawr,2001-08-01,4296.5,2898.5,2334.3,119751
997,40340,Berwyn,2001-08-01,3293.5,2312.0,1839.8,92358
998,41200,Argyle,2001-08-01,2685.0,1930.5,1597.3,75867
999,40770,Lawrence,2001-08-01,2618.7,1732.3,1517.0,73226


In [22]:
# three_yrs_ago = datetime.now() - relativedelta(years=4)
# three_yrs_ago.date().strftime('%y-%m-%d')

'15-09-18'

In [24]:
max_date = cta_monthly_df['date'].max()
max_date

Timestamp('2001-08-01 00:00:00')

In [27]:
ten_yrs_ago = max_date - relativedelta(years=10)
ten_yrs_ago

Timestamp('1991-08-01 00:00:00')

In [None]:
# Fetch Data
    # pull in cta data usng the api link, not local files for the data

# Clean Data
    # combine tables by station id, get rid of extra columns
    # update the location column to two separate lat/long columns
    # standaridze the column names

# Manipulate Data
    # add coloumn for year, based on date
    # create new dataframe that is ridership data by year      

# Send table to sqlite database


# get flask app, up and running, how get data out as geojson out for maps 

In [19]:
Base = declarative_base()

In [20]:
# Create sysetm database from City of Chicago webiste 
engine = create_engine('sqlite:///cta.db')
Base.metadata.create_all(engine)


In [21]:
# add daily ridership table to the database 
url = 'https://data.cityofchicago.org/resource/5neh-572f.json'
daily_df = pd.read_json(url)
daily_df.head()
daily_df.to_sql('daily_ridership', con=engine, index_label='id', if_exists='replace')


In [12]:
url = 'https://data.cityofchicago.org/resource/8pix-ypme.csv'
system2_df = pd.read_csv(url)
system2_df.dtypes

stop_id                          int64
direction_id                    object
stop_name                       object
station_name                    object
station_descriptive_name        object
map_id                           int64
ada                               bool
red                               bool
blue                              bool
g                                 bool
brn                               bool
p                                 bool
pexp                              bool
y                                 bool
pnk                               bool
o                                 bool
location                        object
:@computed_region_awaf_s7ux    float64
:@computed_region_6mkv_f3dw      int64
:@computed_region_vrxf_vc4k    float64
:@computed_region_bdys_3d7i    float64
:@computed_region_43wa_7qmu    float64
dtype: object

In [11]:
system_df.drop([':@computed_region_6mkv_f3dw '])


#  Delete multiple columns from the dataframe
# data = data.drop(["Y2001", "Y2002", "Y2003"], axis=1)

# df.to_sql('system_info', con=engine, index_label='id', if_exists='replace')

KeyError: "[':@computed_region_6mkv_f3dw '] not found in axis"

In [13]:
from sqlalchemy.orm import Session
engine = create_engine("sqlite:///cta.db")
conn = engine.connect()
Base.metadata.create_all(engine)

session = Session(bind=engine)


In [14]:

engine.execute("SELECT * FROM daily_ridership WHERE daytype = 'U'").fetchall()

[(0, '2001-01-01 00:00:00.000000', 'U', 273, 40350, 'UIC-Halsted'),
 (1, '2001-01-01 00:00:00.000000', 'U', 306, 41130, 'Halsted-Orange'),
 (2, '2001-01-01 00:00:00.000000', 'U', 1059, 40760, 'Granville'),
 (3, '2001-01-01 00:00:00.000000', 'U', 649, 40070, 'Jackson/Dearborn'),
 (4, '2001-01-01 00:00:00.000000', 'U', 411, 40090, 'Damen-Brown'),
 (5, '2001-01-01 00:00:00.000000', 'U', 870, 40590, 'Damen/Milwaukee'),
 (6, '2001-01-01 00:00:00.000000', 'U', 391, 40720, 'East 63rd-Cottage Grove'),
 (7, '2001-01-01 00:00:00.000000', 'U', 399, 41260, 'Austin-Lake'),
 (8, '2001-01-01 00:00:00.000000', 'U', 788, 40230, 'Cumberland'),
 (9, '2001-01-01 00:00:00.000000', 'U', 448, 41120, '35-Bronzeville-IIT'),
 (10, '2001-01-01 00:00:00.000000', 'U', 479, 40810, 'Medical Center'),
 (11, '2001-01-01 00:00:00.000000', 'U', 2542, 40330, 'Grand/State'),
 (12, '2001-01-01 00:00:00.000000', 'U', 176, 41050, 'Linden'),
 (13, '2001-01-01 00:00:00.000000', 'U', 0, 40140, 'Skokie'),
 (14, '2001-01-01 00:00