In [1]:
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float, ForeignKey
from sqlalchemy.orm import sessionmaker, relationship
import glob, os

In [2]:
# Get files from output_yummy to put into the database
os.makedirs("cleaned_output")
file_list = []
os.chdir("output_yummy")
for file in glob.glob("*.csv"):
    file_list.append(file)
print(file_list)

['chinese_drop_duplicates_in_Austin_TX.csv', 'chinese_drop_duplicates_in_Des Moines_IA.csv', 'french_drop_duplicates_in_Austin_TX.csv', 'french_drop_duplicates_in_San Antonio_TX.csv', 'pizza_drop_duplicates_in_Seguin_TX.csv', 'pizza_drop_duplicates_in_Waco_TX.csv', 'restaurant_drop_duplicates_in_Agawam_MA.csv', 'restaurant_drop_duplicates_in_Austin_TX.csv', 'restaurant_drop_duplicates_in_San Antonio_TX.csv', 'sushi_drop_duplicates_in_Burlington_VT.csv', 'taco_drop_duplicates_in_Austin_TX.csv', 'taco_drop_duplicates_in_San Antonio_TX.csv']


In [3]:
# Change Column Names of API output to not include parentheses
# file = "chinese_drop_duplicates_in_Austin_TX.csv"
# os.chdir("..")
# os.chdir("TheYummyDataApp")

 
# os.chdir("output_yummy")

In [4]:

for file in file_list:
    restaurants_df = pd.read_csv(file)
    restaurants_df = restaurants_df.rename(columns={"ID(Google)":"Google_ID", "Price_Level(Google)":"Price_Level", "Rating(Google)":"Rating"})
    restaurants_df.head()

# Getting the city, state and cuisine from the file name so we can put it into a new column
    word_list = file.split("_")
    cuisine = word_list[0]
    city = word_list[4].lower()
    end = word_list[5]
    second_word_list = end.split(".")
    state = second_word_list[0].lower()
    new_column = city + "-" + state + "-" + cuisine
    new_column
    
    restaurants_df["City_State_Cuisine"] = new_column
    print(restaurants_df.head())
# Output Cleaned CSV
    restaurants_df.to_csv("../cleaned_output/cleaned_"+file)

                                             Address  \
0  1931 E Ben White Blvd #300, Austin, TX 78741, USA   
1           2712 Bee Caves Rd, Austin, TX 78746, USA   
2             212 E Oltorf St, Austin, TX 78704, USA   
3  3601, 6605 Airport Blvd, Austin, TX 78752, Uni...   
4        625 W Ben White Blvd, Austin, TX 78704, USA   

                                  Google_ID   Latitude  Longitude  \
0  b9eb17dab1edbc2b1a046a224789b1330e44554c  30.215372 -97.744803   
1  bdefc39e9e72060c0c3f8552220f8ea2ca60d20b  30.271814 -97.786538   
2  b75b51402eec73943491ad3847b6b1c46d7a0ad8  30.238337 -97.751720   
3  aeca550acd577ada947d15388018945bbcd9b492  30.330697 -97.715309   
4  ec2339d24cecd5986b306474bb427557f034a055  30.225787 -97.771176   

                               Name  Price_Level  Rating    Zip  \
0                              Asia          2.0     3.8  78741   
1                         Chinatown          2.0     4.1  78746   
2                      New Mandarin          2.

                                             Address  \
0           606 West Ave, San Antonio, TX 78201, USA   
1      3319 Hillcrest Dr, San Antonio, TX 78201, USA   
2  3909 Fredericksburg Rd, San Antonio, TX 78201,...   
3       606 W Cypress St, San Antonio, TX 78212, USA   
4  5115 Fredericksburg Rd, San Antonio, TX 78229,...   

                                  Google_ID   Latitude  Longitude  \
0  ad221873d25b80db38eaf78db8314b1d96325fde  29.468196 -98.525746   
1  0d7d67b76f7e772c7baadff88df8b95bf43d2465  29.492208 -98.554208   
2  5d96f5fff51d230f372c42978eecdea3019d0b42  29.482538 -98.544810   
3  859975d0886755c6dde56228b464ec5f56290ff1  29.440464 -98.500532   
4  0b0d829fabca338094175301102b9e01b13670af  29.501592 -98.560025   

                              Name  Price_Level  Rating    Zip  \
0        Jacala Mexican Restaurant          2.0     4.0  78201   
1                 Jim's Restaurant          2.0     4.1  78201   
2  South Garden Chinese Restaurant          2.0   

In [5]:
os.chdir("../cleaned_output")

In [6]:
engine = create_engine("sqlite:///yummydata.sqlite")

In [7]:
Base = declarative_base()

In [8]:
# Create ORM Class for Zip Code Demographic Data
class ZipDemo(Base):
    
    __tablename__ = 'zip_demographics'

    Zip = Column(Integer, primary_key=True)
    Population = Column(Integer)
    Density = Column(Float)
    AverageIncome = Column(Float)
    City = Column(Text)
    State = Column(Text)
    ZipLatitude = Column(Float)
    ZipLongitude = Column(Float)
    City_State = Column(Text, ForeignKey("user_input.City_State"))
    
#     def __repr__(self):
#         return f"id={self.id}, name={self.station}"

In [9]:
class RestaurantSearch(Base):
    
    __tablename__ = 'restaurant_search'

    id = Column(Integer, primary_key=True)
    Address = Column(Text)    
#     Google_ID = Column(Text, primary_key=True)
    Latitude = Column(Float)
    Longitude = Column(Float)
    Name = Column(Text)
    Price_Level = Column(Integer)
    Rating = Column(Float)
    Zip = Column(Integer, ForeignKey("zip_demographics.Zip"))
    City_State_Cuisine = Column(Text)

In [10]:
class Input(Base):
    
    __tablename__ = 'user_input'

    id = Column(Integer, primary_key=True)
    City = Column(Text)
    State = Column(Text)
    Cuisine = Column(Text)
    City_State = Column(Text)
    City_State_Cuisine = Column(Text)

In [11]:
Base.metadata.create_all(engine)

In [12]:
# Create a Functin to Populate Tables
def populate_table(engine, table, csvfile):
    # connect to the database
    conn = engine.connect()
    
    # Load the CSV file into a pandas dataframe 
    df_of_data_to_insert = pd.read_csv(csvfile)
    
    # Orient='records' creates a list of data to write
    data = df_of_data_to_insert.to_dict(orient='records')

    # Optional: Delete all rows in the table 
#     conn.execute(table.delete())

    # Insert the dataframe into the database in one bulk insert
    conn.execute(table.insert(), data)

In [13]:
# Populate Tables


another_file_list = []
# os.chdir("../cleaned_output")
for file in glob.glob("*.csv"):
    another_file_list.append(file)
    
print(another_file_list)

for file in another_file_list:
    print(file)
    populate_table(engine, RestaurantSearch.__table__, file)

['cleaned_chinese_drop_duplicates_in_Austin_TX.csv', 'cleaned_chinese_drop_duplicates_in_Des Moines_IA.csv', 'cleaned_french_drop_duplicates_in_Austin_TX.csv', 'cleaned_french_drop_duplicates_in_San Antonio_TX.csv', 'cleaned_pizza_drop_duplicates_in_Seguin_TX.csv', 'cleaned_pizza_drop_duplicates_in_Waco_TX.csv', 'cleaned_restaurant_drop_duplicates_in_Agawam_MA.csv', 'cleaned_restaurant_drop_duplicates_in_Austin_TX.csv', 'cleaned_restaurant_drop_duplicates_in_San Antonio_TX.csv', 'cleaned_sushi_drop_duplicates_in_Burlington_VT.csv', 'cleaned_taco_drop_duplicates_in_Austin_TX.csv', 'cleaned_taco_drop_duplicates_in_San Antonio_TX.csv']
cleaned_chinese_drop_duplicates_in_Austin_TX.csv
cleaned_chinese_drop_duplicates_in_Des Moines_IA.csv
cleaned_french_drop_duplicates_in_Austin_TX.csv
cleaned_french_drop_duplicates_in_San Antonio_TX.csv
cleaned_pizza_drop_duplicates_in_Seguin_TX.csv
cleaned_pizza_drop_duplicates_in_Waco_TX.csv
cleaned_restaurant_drop_duplicates_in_Agawam_MA.csv
cleaned_rest

In [14]:
os.chdir("..")
populate_table(engine, ZipDemo.__table__, 'zip_demographicchineseATX.csv')