In [None]:
import pandas as pd
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
from config import postgresPass as pw
from config import databasename as dbn
import plotly.express as px

In [None]:
%matplotlib inline
from matplotlib import style
style.use('fivethirtyeight')

In [None]:
import numpy as np
import datetime as dt

In [None]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import func, inspect

In [None]:
protocol = 'postgresql'
username = 'postgres'
password = pw
host = 'localhost'
port = 5432
database_name = dbn
rds_connection_string = f'{protocol}://{username}:{password}@{host}:{port}/{database_name}'
engine = create_engine(rds_connection_string)

In [None]:
# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine,reflect=True)

In [None]:
# View all of the classes that automap found
Base.classes.keys()

In [None]:
inspector = inspect(engine)
inspector.get_table_names()

In [None]:
# Save references to each table
customers = Base.classes.customers
sales = Base.classes.combined_sales
calendar = Base.classes.calendar
products = Base.classes.products
categories = Base.classes.categories
returns = Base.classes.returns
territories = Base.classes.territories

In [None]:
# Create our session (link) from Python to the DB
session = Session(engine)

In [None]:
# Get a list of column names and types for products
product_columns = inspector.get_columns('products')
for p in product_columns:
    print(p['name'], p["type"])

In [None]:
# Design a query to calculate the total number of products in each product sub category
result3 = session.query(products.productsubcategorykey, func.count(products.productsubcategorykey)).\
    group_by(products.productsubcategorykey).order_by(products.productsubcategorykey).all()

In [None]:
df3 = pd.DataFrame(result3)

In [None]:
df3.rename(columns={1: 'Quantity'},inplace=True)

In [None]:
df3.rename(columns={0: 'Product Subcategory'},inplace=True)

In [None]:
fig = px.bar(df3, x='Product Subcategory', y='Quantity')
fig.update_layout(paper_bgcolor = "rgba(0,0,0,0)",               
plot_bgcolor = "rgba(0,0,0,0)",
autosize=False,
width=800,
height=550)
fig

In [None]:
df3 = pd.DataFrame(result3)

# Save the query results as a Pandas DataFrame and set the index to the Education Level column
df3 = pd.DataFrame(result3,columns=['category', 'number of products'])

# Use Pandas Plotting with Matplotlib to plot the data

df3.plot.bar(x='category', y='number of products',rot=0,fontsize=7 )
plt.xlabel("category")
# plt.ylabel("number of products")

# Add title
plt.title("products in each category", fontsize=20)

# Save to folder
plt.savefig("Visualizations/Categories.png")

In [None]:
all_sales_df = pd.read_sql_query(
    """
    SELECT
    "productkey",
    "customerkey",
    "salesterritorykey",
    "orderquantity"
    FROM combined_sales
    """,
    con=engine)

In [None]:
all_sales_df

In [None]:
territories_df = pd.read_sql_query(
    """
    SELECT
    "salesterritorykey",
    "country",
    "region",
    "continent"
    FROM territories
    """,
    con=engine)

In [None]:
all_sales_df1 = pd.merge(all_sales_df,territories_df, how="left",on='salesterritorykey')

In [None]:
products_df = pd.read_sql_query(
    """
    SELECT
    "productkey",
    "modelname",
    "productsize",
    "productcolor",
    "productsubcategorykey"
    FROM products
    """,
    con=engine)

In [None]:
all_sales_df2 = pd.merge(all_sales_df1,products_df, how="left",on='productkey')

In [None]:
categories_df = pd.read_sql_query(
    """
    SELECT
    "productsubcategorykey",
    "subcategoryname",
    "categoryname"
    FROM categories
    """,
    con=engine)

In [None]:
all_sales_df3 = pd.merge(all_sales_df2,categories_df, how="left",on='productsubcategorykey')

In [None]:
customers_df = pd.read_sql_query(
    """
    SELECT
    customerkey,
    gender,
    educationlevel
   FROM customers;
     
    """,
    con=engine)

In [None]:
all_sales_df4 = pd.merge(all_sales_df3,customers_df, how="left",on='customerkey')

In [None]:
all_sales_df4

In [None]:
all_sales_df4.pop('continent')

In [None]:
all_sales_df4.pop('salesterritorykey')

In [None]:
all_sales_df4.pop('productsubcategorykey')

In [None]:
all_sales_df4

In [None]:
# Get all the data for Bikes. 
just_bikes_df = all_sales_df4.loc[all_sales_df4["categoryname"] == "Bikes"]
just_bikes_df

In [None]:
just_bikes_df.pop('productkey')

In [None]:
just_bikes_df.pop('customerkey')

In [None]:
just_bikes_df

In [None]:
just_bikes_df.to_csv('Resources/just_bikes.csv',index = False)

In [None]:
jb1 = pd.read_csv (r'Resources/just_bikes.csv')

In [None]:
# Get model name data
model_data = jb1["modelname"].value_counts()
model_data

In [None]:
# Create plot
model_data.plot(kind="bar", facecolor="red", rot=45,figsize=(10,10) )

# Set title and axis labels
plt.title("Bike sales by Model Name")
plt.ylabel("Bike sales")
plt.xlabel("Model Name")

# Resize plot to display labels
plt.tight_layout()
plt.savefig("Visualizations/bike_sales_Model.png")

In [None]:
# Get Country Level data
Country_data = just_bikes_df["country"].value_counts()
Country_data

In [None]:
# Create plot
Country_data.plot(kind="bar", facecolor="red", rot=45,figsize=(10,10) )

# Set title and axis labels
plt.title("Bike sales by Country")
plt.ylabel("Bike sales")
plt.xlabel("Country")

# Resize plot to display labels
plt.tight_layout()
plt.savefig("Visualizations/bike_sales_Country.png")

In [None]:
# Get Size Level data
Size_data = just_bikes_df["productsize"].value_counts()
Size_data

In [None]:
# Create plot
Size_data.plot(kind="bar", facecolor="red", rot=45,figsize=(10,10) )

# Set title and axis labels
plt.title("Bike sales by Size")
plt.ylabel("Bike sales")
plt.xlabel("Bike Size")

# Resize plot to display labels
plt.tight_layout()
plt.savefig("Visualizations/bike_sales_Size.png")

In [None]:
# Get Colour Level data
Colour_data = just_bikes_df["productcolor"].value_counts()
Colour_data

In [None]:
# Create plot
Colour_data.plot(kind="bar", facecolor="red", rot=45,figsize=(10,10) )

# Set title and axis labels
plt.title("Bike sales by Colour")
plt.ylabel("Bike sales")
plt.xlabel("Bike Colour")

# Resize plot to display labels
plt.tight_layout()
plt.savefig("Visualizations/bike_sales_Colour.png")

In [None]:
# Close Session
session.close()