# Database setup - Project 2


In [39]:
# Imports
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float, ForeignKey
from sqlalchemy.orm import sessionmaker, relationship


In [40]:
# Create Engine
engine = create_engine("sqlite:///p2_cities.sqlite")

# Use `declarative_base` from SQLAlchemy to model the city table as an ORM class
# Make sure to specify types for each column
# Declare a Base object here
Base = declarative_base()


In [41]:
# Define the ORM class or `Cities`
### BEGIN SOLUTION
class Cities(Base):
    
    __tablename__ = 'cities'

    id = Column(Integer, primary_key=True)
    city = Column(Text)
    state_name = Column(Text)
    population = Column(Float)
    lat = Column(Float)
    lng = Column(Float)
    #date = Column(Text)
    #avg_edu = Column(Float)
    #avg_income_pc = Column(Float)
    #total_college_students = Column(Float)
    #crime_rate_per_100000 = Column(Float)
    
    def __repr__(self):
        return f"id={self.id}, name={self.name}"

In [42]:
#use create all to create the table 

Base.metadata.create_all(engine)


In [43]:
# Verify that the table name exists in the database
engine.table_names()

['cities']

In [44]:
# Use Pandas to Bulk insert each CSV file into the table
def populate_table(engine, table, csvfile):
    """Populates a table from a Pandas DataFrame."""
    # connect to the database
    conn = engine.connect()
    
    # Load the CSV file into a pandas dataframe 
    df_to_insert = pd.read_csv(csvfile)
    
    # Orient='records' creates a list of data to write
    # http://pandas-docs.github.io/pandas-docs-travis/io.html#orient-options
    #orient = to columns or records better for our needs?
    data = df_to_insert.to_dict(orient='records')

    # Optional: Delete all rows in the table 
    conn.execute(table.delete())#NEEDED OR NOT?

    # Insert the dataframe into the database in one bulk insert
    conn.execute(table.insert(), data)
    
# Call the function to insert the data for each table
populate_table(engine, Cities.__table__, 'cities.csv')


In [45]:
# Use a basic query to validate that the data was inserted correctly for table 'cities'

engine.execute("SELECT * FROM cities WHERE population > 100000 ORDER BY population DESC LIMIT 100").fetchall()

[(1840034016, 'New York', 'New York', 19354922.0, 40.6943, -73.9249),
 (1840020491, 'Los Angeles', 'California', 12815475.0, 34.1139, -118.4068),
 (1840000494, 'Chicago', 'Illinois', 8675982.0, 41.8373, -87.6861),
 (1840015149, 'Miami', 'Florida', 6381966.0, 25.784, -80.2102),
 (1840019440, 'Dallas', 'Texas', 5733259.0, 32.7937, -96.7662),
 (1840000673, 'Philadelphia', 'Pennsylvania', 5637884.0, 40.0076, -75.134),
 (1840020925, 'Houston', 'Texas', 5446468.0, 29.7868, -95.3905),
 (1840006060, 'Washington', 'District of Columbia', 5289420.0, 38.9047, -77.0163),
 (1840013660, 'Atlanta', 'Georgia', 5228750.0, 33.7626, -84.4228),
 (1840000455, 'Boston', 'Massachusetts', 4637537.0, 42.3188, -71.0846),
 (1840020568, 'Phoenix', 'Arizona', 4081849.0, 33.5722, -112.0891),
 (1840021117, 'Seattle', 'Washington', 3643765.0, 47.6211, -122.3244),
 (1840021543, 'San Francisco', 'California', 3603761.0, 37.7562, -122.443),
 (1840003971, 'Detroit', 'Michigan', 3522206.0, 42.3834, -83.1024),
 (1840021990

In [19]:
#This CSV is serving as our dummy DATA

In [49]:
df = pd.DataFrame(engine.execute("SELECT * FROM cities ORDER BY population DESC LIMIT 100").fetchall())

In [50]:
df.columns = ["id", "city_name", "state", "pop","lat", "lng"] 


In [51]:
len(df["state"].unique())

38

In [52]:
df

Unnamed: 0,id,city_name,state,pop,lat,lng
0,1840034016,New York,New York,19354922.0,40.6943,-73.9249
1,1840020491,Los Angeles,California,12815475.0,34.1139,-118.4068
2,1840000494,Chicago,Illinois,8675982.0,41.8373,-87.6861
3,1840015149,Miami,Florida,6381966.0,25.7840,-80.2102
4,1840019440,Dallas,Texas,5733259.0,32.7937,-96.7662
5,1840000673,Philadelphia,Pennsylvania,5637884.0,40.0076,-75.1340
6,1840020925,Houston,Texas,5446468.0,29.7868,-95.3905
7,1840006060,Washington,District of Columbia,5289420.0,38.9047,-77.0163
8,1840013660,Atlanta,Georgia,5228750.0,33.7626,-84.4228
9,1840000455,Boston,Massachusetts,4637537.0,42.3188,-71.0846
