In [None]:
import pandas as pd
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
from config import postgresPass as pw

In [None]:
%matplotlib inline
from matplotlib import style
style.use('fivethirtyeight')

In [None]:
import numpy as np
import datetime as dt

In [None]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import func, inspect

In [None]:
protocol = 'postgresql'
username = 'postgres'
password = pw
host = 'localhost'
port = 5432
database_name = 'bike_shop_pr3'
rds_connection_string = f'{protocol}://{username}:{password}@{host}:{port}/{database_name}'
engine = create_engine(rds_connection_string)

In [None]:
# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine,reflect=True)

In [None]:
# View all of the classes that automap found
Base.classes.keys()

In [None]:
inspector = inspect(engine)
inspector.get_table_names()

In [None]:
# Get a list of column names and types for customers
customers_columns = inspector.get_columns('customers')
for c in customers_columns:
    print(c['name'], c["type"])

In [None]:
# Save references to each table
customers = Base.classes.customers
sales = Base.classes.combined_sales
calendar = Base.classes.calendar
products = Base.classes.products
categories = Base.classes.categories
returns = Base.classes.returns
territories = Base.classes.territories

In [None]:
# Create our session (link) from Python to the DB
session = Session(engine)

In [None]:
# Find the most recent date in the data set.
most_recent_date = session.query(func.min(sales.OrderDate)).first()
most_recent_date

In [None]:
# Design a query to calculate the total number customers in the dataset
session.query(func.count(customers.CustomerKey)).all()

In [None]:
# Design a query to calculate the total number customers in each Education Level
result1 = session.query(customers.EducationLevel, func.count(customers.EducationLevel)).\
    group_by(customers.EducationLevel).order_by(func.count(customers.EducationLevel).desc()).all()

In [None]:
df1 = pd.DataFrame(result1)

# Save the query results as a Pandas DataFrame and set the index to the Education Level column
df1 = pd.DataFrame(result1,columns=['Education Level', 'customers'])

# Sort the dataframe by date
df1 = df1.sort_values('customers')

# Use Pandas Plotting with Matplotlib to plot the data

df1.plot.bar(x='Education Level', y='customers', rot=90)
plt.xlabel("Education Level")
plt.ylabel("customers")

# Add title
plt.title("Cusomers in each Education Level", fontsize=15)

# Save to folder
plt.savefig("Resources/Education Level.png")

In [None]:
# Design a query to calculate the total number customers in each Annual Income bracket
result2 = session.query(customers.AnnualIncome, func.count(customers.AnnualIncome)).\
    group_by(customers.AnnualIncome).order_by(func.count(customers.AnnualIncome).desc()).all()

In [None]:
df2 = pd.DataFrame(result2)

# Save the query results as a Pandas DataFrame and set the index to the Education Level column
df2 = pd.DataFrame(result2,columns=['Income Level', 'customers'])

# Use Pandas Plotting with Matplotlib to plot the data

df2.plot.bar(x='Income Level', y='customers', rot=90)
plt.xlabel("Income Level")
plt.ylabel("customers")

# Add title
plt.title("Cusomers in each Income Level", fontsize=15)

# Save to folder
plt.savefig("Resources/Income Level.png")

In [None]:
# Get a list of column names and types for products
product_columns = inspector.get_columns('products')
for p in product_columns:
    print(p['name'], p["type"])

In [None]:
# Design a query to calculate the total number of products in each product sub category
result3 = session.query(products.ProductSubcategoryKey, func.count(products.ProductSubcategoryKey)).\
    group_by(products.ProductSubcategoryKey).order_by(products.ProductSubcategoryKey).all()

In [None]:
df3 = pd.DataFrame(result3)

# Save the query results as a Pandas DataFrame and set the index to the Education Level column
df3 = pd.DataFrame(result3,columns=['category', 'number of products'])

# Use Pandas Plotting with Matplotlib to plot the data

df3.plot.bar(x='category', y='number of products',rot=0,fontsize=7 )
plt.xlabel("category")
# plt.ylabel("number of products")

# Add title
plt.title("products in each category", fontsize=20)

# Save to folder
plt.savefig("Resources/Categories.png")

In [None]:
all_sales_df = pd.read_sql_query(
    """
    SELECT
    "ProductKey",
    "CustomerKey",
    "SalesTerritoryKey",
    "OrderQuantity"
    FROM combined_sales
    """,
    con=engine)

In [None]:
all_sales_df

In [None]:
territories_df = pd.read_sql_query(
    """
    SELECT
    "SalesTerritoryKey",
    "Region",
    "Continent"
    FROM territories
    """,
    con=engine)

In [None]:
territories_df

In [None]:
all_sales_df1 = pd.merge(all_sales_df,territories_df, how="left",on='SalesTerritoryKey')

In [None]:
all_sales_df1

In [None]:
products_df = pd.read_sql_query(
    """
    SELECT
    "ProductKey",
    "ModelName",
    "ProductSubcategoryKey"
    FROM products
    """,
    con=engine)

In [None]:
products_df

In [None]:
all_sales_df2 = pd.merge(all_sales_df1,products_df, how="left",on='ProductKey')

In [None]:
all_sales_df2

In [None]:
categories_df = pd.read_sql_query(
    """
    SELECT
    "ProductSubcategoryKey",
    "CategoryName"
    FROM categories
    """,
    con=engine)

In [None]:
categories_df

In [None]:
all_sales_df3 = pd.merge(all_sales_df2,categories_df, how="left",on='ProductSubcategoryKey')

In [None]:
all_sales_df3

In [None]:
customers_df = pd.read_sql_query(
    """
    SELECT
    "CustomerKey",
    "Gender",
    "EducationLevel"
    FROM customers
    """,
    con=engine)

In [None]:
customers_df

In [None]:
all_sales_df4 = pd.merge(all_sales_df3,customers_df, how="left",on='CustomerKey')

In [None]:
all_sales_df4

In [None]:
all_sales_df4.pop('Continent')

In [None]:
all_sales_df4.pop('SalesTerritoryKey')

In [None]:
all_sales_df4.pop('ProductSubcategoryKey')

In [None]:
all_sales_df4

In [None]:
# Get all the data for Bikes. 
just_bikes_df = all_sales_df4.loc[all_sales_df4["CategoryName"] == "Bikes"]
just_bikes_df

In [None]:
just_bikes_df.pop('ProductKey')

In [None]:
just_bikes_df.pop('CustomerKey')

In [None]:
just_bikes_df

In [None]:
# Generate a pie plot showing the distribution of female versus male bike sales
counts = just_bikes_df.Gender.value_counts()
counts.plot(kind="pie",autopct='%1.1f%%')
plt.show()
# Save to folder
plt.savefig("Resources/bike_sales_gender.png")

In [None]:
# Get model name data
model_data = just_bikes_df["ModelName"].value_counts()
model_data

In [None]:
# Create plot
model_data.plot(kind="bar", facecolor="red", rot=90,figsize=(10,10) )

# Set title and axis labels
plt.title("Bike sales by Model Name")
plt.ylabel("Bike sales")
plt.xlabel("Model Name")

# Resize plot to display labels
plt.tight_layout()
plt.savefig("Resources/bike_sales_Model.png")

In [None]:
# Get Education Level data
Education_data = just_bikes_df["EducationLevel"].value_counts()
Education_data

In [None]:
# Create plot
Education_data.plot(kind="bar", facecolor="red", rot=90,figsize=(10,10) )

# Set title and axis labels
plt.title("Bike sales by Education Level")
plt.ylabel("Bike sales")
plt.xlabel("Education Level")

# Resize plot to display labels
plt.tight_layout()
plt.savefig("Resources/bike_sales_Education.png")

In [None]:
# Close Session
session.close()