In [1]:
!pip install fpdf2 reportlab

[0m

# 📝 Problem: Compute Final Price After Discount

## **Problem Statement**
You are given a dataset containing the following columns:
- **product_id** (String)
- **product_name** (String)
- **original_price** (Double)
- **discount_percentage** (Double)

Your task is to compute the **final price** for each product by applying the discount and return the following columns:
- **product_id**
- **product_name**
- **final_price**

### Formula
\[
final\_price = original\_price \times \left(1 - \frac{discount\_percentage}{100}\right)
\]

---

## **Input**
- **File Path**: `/datasets/products.csv`

### Input Schema
| Column              | Type   |
|---------------------|--------|
| product_id          | String |
| product_name        | String |
| original_price      | Double |
| discount_percentage | Double |

### Example Input Table
| product_id | product_name | original_price | discount_percentage |
|------------|--------------|----------------|---------------------|
| P001       | Laptop       | 1000.00        | 10                  |
| P002       | Phone        | 800.00         | 5                   |
| P003       | Tablet       | 600.00         | 15                  |
| P004       | Monitor      | 300.00         | 20                  |
| P005       | Keyboard     | 100.00         | 25                  |

---

## **Output**
### Output Schema
| Column       | Type   |
|--------------|--------|
| product_id   | String |
| product_name | String |
| final_price  | Double |

### Example Output Table
| product_id | product_name | final_price |
|------------|--------------|-------------|
| P001       | Laptop       | 900.00      |
| P002       | Phone        | 760.00      |
| P003       | Tablet       | 510.00      |
| P004       | Monitor      | 240.00      |
| P005       | Keyboard     | 75.00       |

---

## **Explanation**
The final price is calculated by subtracting the discount from the original price using the formula:

\[
final\_price = original\_price \times (1 - discount\_percentage / 100)
\]

The resulting DataFrame `df_result` contains the required output.

---

In [2]:
from pyspark.sql import SparkSession
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Spacer, Paragraph
from reportlab.lib import colors
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.graphics.shapes import Drawing, Rect
import os
import uuid

In [3]:
spark = SparkSession.builder \
    .appName("PostgresTablesToPDF") \
    .config("spark.executor.memory", "2g") \
    .config("spark.driver.memory", "2g") \
    .config("spark.sql.shuffle.partitions", "10") \
    .config("spark.jars.packages", "org.postgresql:postgresql:42.6.0") \
    .getOrCreate()

In [4]:
db_type = "postgresql"
host = "172.18.0.2"
port = 5432
database = "ecommerce"
user = "postgres"
password = "iaCkmHPhuyhFLEBDGdwxQGGqlHvdgWJA"

jdbc_url = f"jdbc:{db_type}://{host}:{port}/{database}"
properties = {
    "user": user,
    "password": password,
    "driver": "org.postgresql.Driver"
}

In [5]:
query = "(SELECT table_name FROM information_schema.tables WHERE table_schema='public') AS table_list"
tables_df = spark.read.jdbc(url=jdbc_url, table=query, properties=properties)
table_names = [row['table_name'] for row in tables_df.collect()]

In [6]:
table_names

['shipment',
 'user',
 'user_role',
 'role',
 'delivery_partner',
 'address',
 'settlement',
 'shipment_event',
 'package',
 'seller',
 'category',
 'product',
 'brand',
 'product_variant',
 'product_media',
 'warehouse',
 'inventory',
 'inventory_movement',
 'reservation',
 'cart',
 'cart_item',
 'wishlist',
 'wishlist_item',
 'payment_method',
 'return',
 'review',
 'order',
 'order_item',
 'payment',
 'refund',
 'chargeback',
 'promotion_applied',
 'promotion',
 'campaign',
 'vendor_fee',
 'dispute',
 'support_ticket',
 'ticket_event']

In [7]:
output_dir = "/home/jupyter/work/data/pdf_tables"
os.makedirs(output_dir, exist_ok=True)

In [8]:
def write_table_to_pdf(df, table_name):
    pdf_path = os.path.join(output_dir, f"{table_name}.pdf")
    pdf_doc = SimpleDocTemplate(pdf_path, pagesize=letter)
    elements = []
    styles = getSampleStyleSheet()

    company_para = Paragraph("My Company", styles['Title'])

    drawing = Drawing(50, 50)
    drawing.add(Rect(0, 0, 50, 50, strokeWidth=1, strokeColor=colors.black, fillColor=colors.lightgrey))

    header_table = Table([[company_para, drawing]], colWidths=[400, 100])
    header_table.setStyle(TableStyle([
        ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
        ('ALIGN', (0,0), (0,0), 'LEFT'),
        ('ALIGN', (1,0), (1,0), 'RIGHT'),
        ('BOTTOMPADDING', (0,0), (-1,-1), 12)
    ]))
    elements.append(header_table)
    elements.append(Spacer(1, 20)) 

    data = df.limit(10).collect()
    if not data:
        return

    for row in data:
        row_data = []
        for col in df.columns:
            row_data.append([col, row[col]]) 

        t = Table(row_data, colWidths=[150, 300])
        t.setStyle(TableStyle([
            ('GRID', (0,0), (-1,-1), 0.5, colors.black),
            ('BACKGROUND', (0,0), (-1,0), colors.lightgrey),
            ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
            ('LEFTPADDING', (0,0), (-1,-1), 5),
            ('RIGHTPADDING', (0,0), (-1,-1), 5),
        ]))
        elements.append(t)
        elements.append(Spacer(1, 10)) 

In [9]:
df = spark.read.jdbc(url=jdbc_url, table=table_names[0], properties=properties)
write_table_to_pdf(df, table_names[0])