In [9]:
import pandas as pd
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
from config import postgresPass as pw

In [10]:
%matplotlib inline
from matplotlib import style
style.use('fivethirtyeight')

In [11]:
import numpy as np
import datetime as dt

In [12]:
from splinter import Browser
from bs4 import BeautifulSoup as soup
from webdriver_manager.chrome import ChromeDriverManager

In [13]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import func, inspect

In [14]:
protocol = 'postgresql'
username = 'postgres'
password = pw
host = 'localhost'
port = 5432
database_name = 'bike_shop_test'
rds_connection_string = f'{protocol}://{username}:{password}@{host}:{port}/{database_name}'
engine = create_engine(rds_connection_string)

In [18]:
# reflect an existing database into a new model
Base1 = automap_base()
# reflect the tables
Base1.prepare(engine,reflect=True)

In [19]:
# View all of the classes that automap found
Base1.classes.keys()

['customers', 'calendar', 'products', 'categories', 'territories']

In [20]:
inspector = inspect(engine)
inspector.get_table_names()

['customers',
 'combined_sales',
 'calendar',
 'products',
 'categories',
 'returns',
 'territories']

In [None]:
# Get a list of column names and types for customers
sales_columns = inspector.get_columns('combined_sales')
for s in sales_columns:
    print(s['name'], s["type"])

In [None]:
# Save references to each table
customers = Base.classes.customers
sales = Base.classes.combined_sales
calendar = Base.classes.calendar
products = Base.classes.products
categories = Base.classes.categories
# returns = Base.classes.returns
territories = Base.classes.territories

In [None]:
# Create our session (link) from Python to the DB
session = Session(engine)

In [None]:
# Find the most recent date in the data set.
most_recent_date = session.query(func.max(calendar.Date)).first()
most_recent_date

In [None]:
# Design a query to calculate the total number customers in the dataset
session.query(func.count(customers.CustomerKey)).all()

In [None]:
# Design a query to calculate the total number customers in each Education Level
result1 = session.query(customers.EducationLevel, func.count(customers.EducationLevel)).\
    group_by(customers.EducationLevel).order_by(func.count(customers.EducationLevel).desc()).all()

In [None]:
df1 = pd.DataFrame(result1)

# Save the query results as a Pandas DataFrame and set the index to the Education Level column
df1 = pd.DataFrame(result1,columns=['Education Level', 'customers'])

# Sort the dataframe by date
df1 = df1.sort_values('customers')

# Use Pandas Plotting with Matplotlib to plot the data

df1.plot.bar(x='Education Level', y='customers', rot=90)
plt.xlabel("Education Level")
plt.ylabel("customers")

# Add title
plt.title("Cusomers in each Education Level", fontsize=15)

# Save to folder
plt.savefig("Resources/Education Level.png")

In [None]:
# Design a query to calculate the total number customers in each Annual Income bracket
result2 = session.query(customers.AnnualIncome, func.count(customers.AnnualIncome)).\
    group_by(customers.AnnualIncome).order_by(func.count(customers.AnnualIncome).desc()).all()

In [None]:
df2 = pd.DataFrame(result2)

# Save the query results as a Pandas DataFrame and set the index to the Education Level column
df2 = pd.DataFrame(result2,columns=['Income Level', 'customers'])

# Use Pandas Plotting with Matplotlib to plot the data

df2.plot.bar(x='Income Level', y='customers', rot=90)
plt.xlabel("Income Level")
plt.ylabel("customers")

# Add title
plt.title("Cusomers in each Income Level", fontsize=15)

# Save to folder
plt.savefig("Resources/Income Level.png")

In [None]:
# Get a list of column names and types for products
product_columns = inspector.get_columns('products')
for p in product_columns:
    print(p['name'], p["type"])

In [None]:
# Design a query to calculate the total number of products in each product sub category
result3 = session.query(products.ProductSubcategoryKey, func.count(products.ProductSubcategoryKey)).\
    group_by(products.ProductSubcategoryKey).order_by(products.ProductSubcategoryKey).all()

In [None]:
df3 = pd.DataFrame(result3)

# Save the query results as a Pandas DataFrame and set the index to the Education Level column
df3 = pd.DataFrame(result3,columns=['category', 'number of products'])

# Use Pandas Plotting with Matplotlib to plot the data

df3.plot.bar(x='category', y='number of products',rot=0,fontsize=7 )
plt.xlabel("category")
# plt.ylabel("number of products")

# Add title
plt.title("products in each category", fontsize=20)

# Save to folder
plt.savefig("Resources/Categories.png")

In [None]:
# Close Session
session.close()

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

options = Options()
options.add_argument("start-maximized")
driver = webdriver.Chrome(chrome_options=options, executable_path=r'C:\Users\gcwhi\Downloads\chromedriver_win32\chromedriver.exe')
driver.get('https://www.google.co.in')
print("Page Title is : %s" %driver.title)


In [None]:
# # Set up Splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [None]:
# Visit the Adventure bikes site
url = 'http://demos.componentone.com/ASPNET/AdventureWorksMVC/Products/Index?Category=Bikes'
browser.visit(url)

# Optional delay for loading the page
browser.is_element_present_by_css('div.list_text', wait_time=1)

In [None]:
# Convert the browser html to a soup object
html = browser.html
bikes_soup = soup(html, 'html.parser')

In [None]:
bikes_soup