In [1]:
# Import the dependencies
import pandas as pd
import numpy as np
import datetime

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns

# ORM Stuff
from sqlalchemy import create_engine, inspect, text, func
from sqlalchemy.orm import Session
from sqlalchemy.ext.automap import automap_base

In [2]:
# connect to postgres
USERNAME = "postgres"
PASSWORD = "postgres"
HOST = "localhost"
PORT = 5432
DATABASE = "crowdfunding"
connection_str = f"postgresql://{USERNAME}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}"

# Create the connection engine
engine = create_engine(connection_str)

In [3]:
# Create the inspector and connect it to the engine
inspector = inspect(engine)

# Collect the names of tables within the database
tables = inspector.get_table_names()

# Using the inspector to print the column names for all tables and its types
for table in tables:
    print(table)
    print("--------")
    columns = inspector.get_columns(table)
    for column in columns:
        print(column["name"], column["type"])

    print()

contacts
--------
contact_id INTEGER
first_name VARCHAR(50)
last_name VARCHAR(50)
email VARCHAR(50)

campaign
--------
cf_id INTEGER
contact_id INTEGER
company_name VARCHAR(100)
description VARCHAR(500)
goal DOUBLE PRECISION
pledged DOUBLE PRECISION
outcome VARCHAR(50)
backers_count INTEGER
country VARCHAR(10)
currency VARCHAR(10)
launch_date DATE
end_date DATE
category_id VARCHAR(10)
subcategory_id VARCHAR(10)

category
--------
category_id VARCHAR(10)
category VARCHAR(20)

subcategory
--------
subcategory_id VARCHAR(10)
subcategory VARCHAR(20)



In [4]:
conn = engine.connect()

In [27]:
# Query 1: Number of successful documentaries and dramas 
query1 = text("""
    SELECT 
        outcome AS o, 
        category AS c, 
        subcategory AS s
    FROM 
        campaign
    WHERE 
        (subcategory = 'drama' OR subcategory = 'documentary')
        AND outcome = 'successful';
    """)

    # Execute Query 1 and load the results into a DataFrame
    df_successful = pd.read_sql(query1, con=conn)

    # Display the top results for Query 1
    print("Number of Successful Documentaries and Dramas:")
    print(df_successful.head())

    # Query 2: Pledged amount for documentaries and dramas
    query2 = text("""
    SELECT 
        outcome AS o, 
        category AS c, 
        subcategory AS s, 
        pledged AS p
    FROM 
        campaign
    WHERE 
        (subcategory = 'drama' OR subcategory = 'documentary')
        AND outcome = 'successful';
    """)

    # Execute Query 2 and load the results into a DataFrame
    df_pledged = pd.read_sql(query2, con=conn)

    # Display the top results for Query 2
    print("\nPledged Amount for Documentaries and Dramas:")
    print(df_pledged.head())

except Exception as e:
    # Print the error message
    print(f"An error occurred: {e}")

IndentationError: unexpected indent (3716819855.py, line 15)

In [5]:
# Raw SQL/Pandas
query = text("""
SELECT 
    cam.campaign_id,
    cam.name AS campaign_name,
    cam.category_id,
    ca.category,
    cam.pledged,
    cam.goal,
    (cam.pledged / cam.goal) AS success_ratio
FROM 
    campaign cam
JOIN 
    category ca 
ON 
    cam.category_id = ca.category_id
WHERE 
    ca.category = 'music'
ORDER BY 
    success_ratio DESC
LIMIT 1;
""")

# Execute the query and load the data into a DataFrame
df1 = pd.read_sql(query, con=conn)

# Display the top results
df1.head()

ProgrammingError: (psycopg2.errors.UndefinedColumn) column cam.campaign_id does not exist
LINE 3:     cam.campaign_id,
            ^

[SQL: 
SELECT 
    cam.campaign_id,
    cam.name AS campaign_name,
    cam.category_id,
    ca.category,
    cam.pledged,
    cam.goal,
    (cam.pledged / cam.goal) AS success_ratio
FROM 
    campaign cam
JOIN 
    category ca 
ON 
    cam.category_id = ca.category_id
WHERE 
    ca.category = 'music'
ORDER BY 
    success_ratio DESC
LIMIT 1;
]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [None]:
query = text("""
SELECT 
    outcome AS o, 
    category AS c, 
    subcategory AS s
FROM 
    campaign cam
WHERE 
    category = 'film & video'
    AND (subcategory = 'drama' OR subcategory = 'documentary');
""")

df2 = pd.read_sql(query, con=conn)
df2.head()

In [None]:
# Raw SQL/Pandas
query = text(f"""SELECT
	cam.category_id,
	ca.category,
	count(cf_id) as num_campaigns
FROM
	campaign cam
	JOIN category ca on cam.category_id = ca.category_id
GROUP BY
	cam.category_id,
	ca.category
ORDER BY
	num_campaigns asc;
""")

df = pd.read_sql(query, con=conn)
df.head()

In [None]:
# Canvas Size
plt.figure(figsize=(12, 6))

# Create a basic plot
plt.bar(df.category, df.num_campaigns, color="green", align="center")

# Customizations

# Change colors
# Add in reference lines
# Add Labels/Title
plt.xlabel("Number of Campaigns", fontfamily="Arial", fontsize=12)
plt.ylabel("Category", fontfamily="Arial", fontsize=12)
plt.title("Number of Campaigns per Category", fontweight="bold", fontsize=16, fontfamily="Arial")

# Set X/Y Limits
plt.xlim(0, 10)

# Add in a background grid
plt.grid(axis="x", linestyle="--", color="lightgrey", alpha=0.75)

# Show/Save the Graph
plt.tight_layout()
plt.savefig("part4_categories.png")
plt.show()

In [None]:
# ORM STUFF
# Declare a Base using `automap_base()`
Base = automap_base()

# Use the Base class to reflect the database tables
Base.prepare(autoload_with=engine)

# Print all of the classes mapped to the Base
Base.classes.keys()

In [None]:
 # Assign Classes
Category = Base.classes.category
Contacts = Base.classes.contacts
Campaign = Base.classes.campaign
Subcategory = Base.classes.subcategory

In [None]:
# Create a session
session = Session(engine) # ORM

In [None]:
# Query using the ORM session
from sqlalchemy import asc

rows = (
    session.query(
        Campaign.category_id,
        Category.category,
        func.count(Campaign.cf_id).label("num_campaigns"),
    )
    .join(Category, Campaign.category_id == Category.category_id)
    .group_by(Campaign.category_id, Category.category)
    .order_by(asc("num_campaigns"))
    .all()
)

dfa = pd.DataFrame(rows)
dfa.head()

In [None]:
# Change Canvas Size
plt.figure(figsize=(12, 6))

# Create a basic plot
plt.bar(dfa.category, dfa.num_campaigns, color="blue", align="center")

# Customizations

# Change colors
# Add in reference lines
# Add Labels/Title
plt.xlabel("Number of Campaigns", fontfamily="Arial", fontsize=12)
plt.ylabel("Category", fontfamily="Arial", fontsize=12)
plt.title("Number of Campaigns per Category (ORM)", fontweight="bold", fontsize=16, fontfamily="Arial")

# Set X/Y Limits
plt.xlim(0, 10)

# Add in a background grid
plt.grid(axis="x", linestyle="--", color="lightgrey", alpha=0.75)

# Show/Save the Graph
plt.tight_layout()
plt.savefig("part4_categories_ORM.png")
plt.show()

In [None]:
conn.close()
session.close()

In [None]:
engine.dispose()