# Database setup - Project 2


In [1]:
# Imports
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float, ForeignKey
from sqlalchemy.orm import sessionmaker, relationship


In [2]:
# Create Engine
engine = create_engine("sqlite:///p2_cities.sqlite")

# Use `declarative_base` from SQLAlchemy to model the city table as an ORM class
# Make sure to specify types for each column
# Declare a Base object here
Base = declarative_base()


In [3]:
# Define the ORM class or `Cities`
### BEGIN SOLUTION
class Cities(Base):
    
    __tablename__ = 'cities'

    id = Column(Integer, primary_key=True)
    city = Column(Text)
    state_id = Column(Text)
    population = Column(Float)
    lat = Column(Float)
    lng = Column(Float)
    #date = Column(Text)
    #avg_edu = Column(Float)
    #avg_income_pc = Column(Float)
    #total_college_students = Column(Float)
    #crime_rate_per_100000 = Column(Float)
    
    def __repr__(self):
        return f"id={self.id}, name={self.name}"

In [4]:
#use create all to create the table 

Base.metadata.create_all(engine)


In [5]:
# Verify that the table name exists in the database
engine.table_names()

['cities']

In [6]:
# Use Pandas to Bulk insert each CSV file into the table
def populate_table(engine, table, csvfile):
    """Populates a table from a Pandas DataFrame."""
    # connect to the database
    conn = engine.connect()
    
    # Load the CSV file into a pandas dataframe 
    df_to_insert = pd.read_csv(csvfile)
    
    # Orient='records' creates a list of data to write
    # http://pandas-docs.github.io/pandas-docs-travis/io.html#orient-options
    #orient = to columns or records better for our needs?
    data = df_to_insert.to_dict(orient='records')

    # Optional: Delete all rows in the table 
    conn.execute(table.delete())#NEEDED OR NOT?

    # Insert the dataframe into the database in one bulk insert
    conn.execute(table.insert(), data)
    
# Call the function to insert the data for each table
populate_table(engine, Cities.__table__, 'cities.csv')


In [10]:
# Use a basic query to validate that the data was inserted correctly for table 'cities'

engine.execute("SELECT * FROM cities WHERE population > 100000 LIMIT 100").fetchall()

[(1840000177, 'Fargo', 'ND', 204820.0),
 (1840000327, 'Portland', 'ME', 206023.0),
 (1840000373, 'Rochester', 'NY', 711998.0),
 (1840000378, 'Syracuse', 'NY', 407259.0),
 (1840000386, 'Buffalo', 'NY', 926261.0),
 (1840000417, 'Albany', 'NY', 597270.0),
 (1840000426, 'Lowell', 'MA', 111346.0),
 (1840000429, 'Cambridge', 'MA', 113630.0),
 (1840000434, 'Worcester', 'MA', 498997.0),
 (1840000437, 'Leominster', 'MA', 119400.0),
 (1840000440, 'Waterloo', 'IA', 112060.0),
 (1840000447, 'Sioux City', 'IA', 106279.0),
 (1840000455, 'Boston', 'MA', 4637537.0),
 (1840000460, 'Binghamton', 'NY', 150747.0),
 (1840000466, 'Springfield', 'MA', 628076.0),
 (1840000471, 'Cedar Rapids', 'IA', 186160.0),
 (1840000478, 'Erie', 'PA', 188079.0),
 (1840000494, 'Chicago', 'IL', 8675982.0),
 (1840000500, 'Poughkeepsie', 'NY', 396041.0),
 (1840000532, 'Iowa City', 'IA', 119091.0),
 (1840000572, 'Toledo', 'OH', 488672.0),
 (1840000596, 'Cleveland', 'OH', 1730363.0),
 (1840000644, 'Lorain', 'OH', 180239.0),
 (184

In [None]:
#This CSV is serving as our dummy DATA