# From Pandas to PostgreSQL: Bulk Insert with copy_from()

*By Naysan Saran, June 2020.*

## 1 - Introduction

This file is associated with my blog post. 

First we need to load the dataframe:

In [34]:
import pandas as pd


df = pd.read_csv("Cricket_set.csv")
# df = df.rename(columns={
#     "Source": "source", 
#     "Date": "datetime",
#     "Mean": "mean_temp"
# })
df.head(3)

Unnamed: 0,country,win,year
0,India,3,2004
1,Australia,2,2018
2,SA,5,2011


In [35]:
df = pd.read_csv("Cricket_set.csv")
df = df.rename(columns={
    "country": "change_country", 
    "win": "change_win",
})
df.head(3)

Unnamed: 0,change_country,change_win,year
0,India,3,2004
1,Australia,2,2018
2,SA,5,2011


## Option 1: saving the dataframe to disk first

In [36]:
import psycopg2
import os

# Connection parameters
param_dic = {
    "host"      : "localhost",
    "database"  : "demodb",
    "user"      : "postgres",
    "password"  : "Sujit@123"
}

def connect(params_dic):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params_dic)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        sys.exit(1) 
    print("Connection successful")
    return conn


def copy_from_file(conn, df, table):
    """
    Here we are going save the dataframe on disk as 
    a csv file, load the csv file  
    and use copy_from() to copy it to the table
    """
    # Save the dataframe to disk
    #tmp_df = "./tmp_dataframe.csv"
    #tmp_df = "C:/Projects/NaysanSaran_/pandas2postgresql/notebooks/bulk_insert.csv"
    tmp_df = "C:/Users/Admin/Documents/Excel_Datasets_CSV_Files/CSV Files/tmp_dataframe2.csv"
    df.to_csv(tmp_df, index_label='Rank', header=False)
    f = open(tmp_df, 'r')
    cursor = conn.cursor()
    try:
        cursor.copy_from(f, table, sep=",")
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        os.remove(tmp_df)
        print("Error: %s" % error)
        conn.rollback()
        cursor.close()
        return 1
    print("copy_from_file() done")
    cursor.close()
   # os.remove(tmp_df)  # this might give some error as file not found since it delets the file after the entire process


    
#-----------------------------------------------
# Main code
#-----------------------------------------------
conn = connect(param_dic) # connect to the database
copy_from_file(conn, df, 'monthlytemp') # copy the dataframe to SQL
conn.close() # close the connection

Connecting to the PostgreSQL database...
Connection successful
copy_from_file() done


## Option 2: Saving the dataframe to memory using StringIO

In [37]:
import psycopg2
from io import StringIO

# Connection parameters
param_dic = {
    "host"      : "localhost",
    "database"  : "demodb",
    "user"      : "postgres",
    "password"  : "Sujit@123"
}

def connect(params_dic):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params_dic)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        sys.exit(1) 
    print("Connection successful")
    return conn


def copy_from_stringio(conn, df, table):
    """
    Here we are going save the dataframe in memory 
    and use copy_from() to copy it to the table
    """
    # save dataframe to an in memory buffer
    buffer = StringIO()
    df.to_csv(buffer, index_label='', header=False)
    buffer.seek(0)
    
    cursor = conn.cursor()
    try:
        cursor.copy_from(buffer, table, sep=",")
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        conn.rollback()
        cursor.close()
        return 1
    print("copy_from_stringio() done")
    cursor.close()

    
#-----------------------------------------------
# Main code
#-----------------------------------------------
conn = connect(param_dic) # connect to the database
copy_from_stringio(conn, df, 'monthlytemp') # copy the dataframe to SQL
conn.close() # close the connection

Connecting to the PostgreSQL database...
Connection successful
copy_from_stringio() done
