# Loading data to Postgresql using psycopg2

In [1]:
import pandas as pd
import numpy as np
import json
import os
import psycopg2
from psycopg2 import sql

## db connection

In [2]:
def psycopg_connection(user,password,host,port,db_name):
    connection = psycopg2.connect(
        dbname=db_name,
        user=user,
        password=password,
        host=host,
        port=port
        
    )
    cursor = connection.cursor()
    return connection, cursor

## read processed auction file(s)

In [7]:
def read_data(auction_files):
    dfs = []
    for file in auction_files:
        df = pd.read_json(os.path.join(processed_auctions_path, file), lines=True)
        dfs.append(df)
        
    if dfs:
        combined_df = pd.concat(dfs, ignore_index=True)
        return combined_df

## create df

In [None]:
processed_auctions_path=""
files = os.listdir(processed_auctions_path)
df = read_data(files)
df.head(5)

Unnamed: 0,auction_url,auction_title,auction_subtitle,dougs_take,auction_highlights,services,auction_equipment,modifications,known_flaws,included_items,...,median_bid,bid_range,highlight_count,equipment_count,mod_count,flaw_count,service_count,included_items_count,video_count,manufacture_year
0,https://carsandbids.com/auctions/9QXJMeyk/2022...,2022 Porsche 718 Cayman GT4,"6-Speed Manual, 414-hp Flat-6, Shark Blue, Mos...",,[This Cayman GT4 features the desirable 6-spee...,[],"[Brake calipers in high-gloss black, Limited-s...","[20-inch GT4 wheels in Platinum Silver, Paint ...","[Scratches beneath the front lip, Some creases...","[2 keys, Owner's manual, Window sticker]",...,103750.0,112282.0,5,11.0,4.0,2,0,3,0,2022.0
1,https://carsandbids.com/auctions/3Ox6AkyK/2018...,2018 Mazda MX-5 Miata Club,"NO RESERVE 6-Speed Manual, Limited-Slip Differ...",,[This Miata features the desirable 6-speed man...,"[August 2023 (35,571 miles): Air conditioning ...","[Limited-slip differential, LED headlights, Ma...","[Fab9Tuning N1 ECU tune, Good-Win Racing Roads...",[Some chips and scratches around the exterior ...,"[2 keys, Owner's manual, Removed factory parts...",...,20625.0,18056.0,6,12.0,26.0,4,13,5,0,2018.0
2,https://carsandbids.com/auctions/r4YLPgRy/2017...,2017 Jaguar F-Type R Coupe,"NO RESERVE Supercharged V8, AWD, Vision Packag...",,[The attached Carfax vehicle history report sh...,"[October 2023 (46,000 miles): Vehicle serviced...",[Vision Package (adaptive and intelligent fron...,[],[The attached Carfax history report indicates ...,"[1 key, Owner's manual, Digital Monroney label]",...,34812.0,32533.0,4,12.0,0.0,4,22,3,0,2017.0
3,https://carsandbids.com/auctions/K1NGwZPx/2006...,2006 BMW 330xi Sedan,"NO RESERVE 6-Speed Manual, AWD, Premium Packag...",,[This 330xi comes equipped with the desirable ...,[],"[Premium Package (BMW Assist, universal transc...",[],"[Chips, scratches, and some touched up areas a...","[1 key, Owner's manuals, Service records, Floo...",...,4500.0,5156.0,5,12.0,0.0,7,0,4,0,2006.0
4,https://carsandbids.com/auctions/9naV6OJG/1988...,1988 Toyota Land Cruiser FJ75 4x4,"Middle Eastern-Market FJ75, Left-Hand Drive, 5...",,[This Land Cruiser is a left-hand drive Middle...,[],"[16-inch steel wheels, Sliding rear window, Si...",[BOSS Audio Systems 550B head unit (factory ra...,"[Scratches, dings, and paint damage with corro...","[3 keys, Factory radio]",...,11000.0,16796.0,4,5.0,1.0,7,0,2,0,1988.0


In [9]:
df['auction_date']

0        1709846700000
1        1709846280000
2        1709845980000
3        1709845800000
4        1709845440000
             ...      
27008    1653512520000
27009    1653512460000
27010    1653511560000
27011    1653511560000
27012    1653510660000
Name: auction_date, Length: 27013, dtype: int64

## load to staging table

In [11]:
def load_to_staging(df, conn, cursor):
    insert_columns = [
        "auction_date","auction_id","vin","seller_type","reserve_status","reserve_met","auction_status",
        "auction_title","auction_subtitle","make","model","exterior_color","interior_color",
        "body_style","mileage","engine","drivetrain","transmission","transmission_type", "gears",
        "title_status_cleaned","title_state","city","state","bid_count", "view_count", "watcher_count",
        "highest_bid_value","max_bid","min_bid","mean_bid","median_bid","bid_range","bids",
        "highlight_count","equipment_count","mod_count","flaw_count","service_count","included_items_count",
        "video_count","manufacture_year","location","auction_url","seller"  
    ]
    
    insert_df = df[insert_columns]
    
    insert_df = insert_df.replace({np.nan: None})
    insert_df = insert_df.sort_values('auction_date', ascending=False).reset_index(drop=True)
    insert_df = insert_df.drop_duplicates('auction_id', keep='first')

    data = list(insert_df.itertuples(index=False, name=None))
    query = sql.SQL("INSERT INTO {table} ({columns}) VALUES ({placeholders})").format(
        table = sql.Identifier('staging'),
        columns = sql.SQL(", ").join(map(sql.Identifier, insert_columns)),
        placeholders = sql.SQL(", ").join(sql.Placeholder()*len(insert_columns))
        
    )

    cursor.executemany(query, data)    
    conn.commit()
    return cursor.rowcount

In [None]:
# db credentials
user=""
password=""
host=""
port=
db_name=""

# connection
conn, cursor = psycopg_connection(user,password,host,port,db_name)

# load to staging
query = load_to_staging(df, conn, cursor)
cursor.close()
conn.close()
query # inserted rows

27013