In [68]:
import pandas as pd
import psycopg2
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv
from urllib.parse import quote_plus

In [69]:
# Load environment variables from .env
load_dotenv()

True

In [70]:
# Fetch database credentials from .env
DB_NAME = os.getenv("DB_NAME")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")

# Encode the password to handle special characters
password_encoded = quote_plus(DB_PASSWORD)

# Use only this connection string (DO NOT create another one)
connection_url = f'postgresql://{DB_USER}:{password_encoded}@{DB_HOST}:{DB_PORT}/{DB_NAME}'

# Use this engine throughout the script
engine = create_engine(connection_url)

In [71]:
# Establish a connection using psycopg2
conn = psycopg2.connect(
    dbname=DB_NAME,
    user=DB_USER,
    password=DB_PASSWORD,
    host=DB_HOST,
    port=DB_PORT
)
cursor = conn.cursor()

print("Connection to PostgreSQL successful!")

# conn.close()

Connection to PostgreSQL successful!


In [72]:
# Loading cleaned dataset
df = pd.read_csv("..\data\cleaned_dataset.csv")

In [73]:
# Create a copy before modifying
df_copy = df.copy()

# Save the copy as a backup
df_copy.to_csv("../data/cleaned_dataset_backup.csv", index=False)

print("Backup created successfully!")

Backup created successfully!


In [74]:
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', 1000)  # Increase width to avoid truncation

In [75]:
# Display basic info
df_copy.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29999 entries, 0 to 29998
Data columns (total 35 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   ID                          29999 non-null  int64  
 1   LIMIT_BAL                   29999 non-null  int64  
 2   SEX                         29999 non-null  int64  
 3   EDUCATION                   29999 non-null  int64  
 4   MARRIAGE                    29999 non-null  int64  
 5   AGE                         29999 non-null  int64  
 6   PAY_0                       29999 non-null  int64  
 7   PAY_2                       29999 non-null  int64  
 8   PAY_3                       29999 non-null  int64  
 9   PAY_4                       29999 non-null  int64  
 10  PAY_5                       29999 non-null  int64  
 11  PAY_6                       29999 non-null  int64  
 12  BILL_AMT1                   29999 non-null  int64  
 13  BILL_AMT2                   299

In [76]:
# Show first few rows
df_copy.head()

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default_payment_next_month,HIGH_CREDIT_LIMIT,CREDIT_LIMIT_CATEGORY,LIMIT_BAL_GROUP,PAY_AMT_GROUP,PAY_AMT1_GROUP,PAY_AMT2_GROUP,PAY_AMT3_GROUP,PAY_AMT4_GROUP,PAY_AMT5_GROUP,PAY_AMT6_GROUP
0,1,20000,2,2,1,24,2,2,-1,-1,-2,-2,3913,3102,689,0,0,0,0.0,689.0,0.0,0.0,0.0,0.0,1,0,Low Credit Limit,Low Limit,Low Payment,Low Payment,Low Payment,Low Payment,Low Payment,Low Payment,Low Payment
1,2,120000,2,2,2,26,-1,2,0,0,0,2,2682,1725,2682,3272,3455,3261,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1,0,Medium Credit Limit,Medium Limit,Low Payment,Low Payment,Medium Payment,Medium Payment,Medium Payment,Low Payment,High Payment
2,3,90000,2,2,2,34,0,0,0,0,0,0,29239,14027,13559,14331,14948,15549,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,0,0,Medium Credit Limit,Medium Limit,Medium Payment,Medium Payment,Medium Payment,Medium Payment,Medium Payment,Medium Payment,Very High Payment
3,4,50000,2,2,1,37,0,0,0,0,0,0,46990,48233,49291,28314,28959,29547,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,0,0,Low Credit Limit,Low Limit,Medium Payment,Medium Payment,High Payment,Medium Payment,Medium Payment,Medium Payment,Medium Payment
4,5,50000,1,2,1,57,-1,0,-1,0,0,0,8617,5670,35835,20940,19146,19131,2000.0,3962.5,4174.125,3574.5,689.0,679.0,0,0,Low Credit Limit,Low Limit,Medium Payment,Medium Payment,Very High Payment,Very High Payment,Very High Payment,Medium Payment,Medium Payment


In [77]:
print(df_copy.dtypes)

ID                              int64
LIMIT_BAL                       int64
SEX                             int64
EDUCATION                       int64
MARRIAGE                        int64
AGE                             int64
PAY_0                           int64
PAY_2                           int64
PAY_3                           int64
PAY_4                           int64
PAY_5                           int64
PAY_6                           int64
BILL_AMT1                       int64
BILL_AMT2                       int64
BILL_AMT3                       int64
BILL_AMT4                       int64
BILL_AMT5                       int64
BILL_AMT6                       int64
PAY_AMT1                      float64
PAY_AMT2                      float64
PAY_AMT3                      float64
PAY_AMT4                      float64
PAY_AMT5                      float64
PAY_AMT6                      float64
default_payment_next_month      int64
HIGH_CREDIT_LIMIT               int64
CREDIT_LIMIT

In [78]:
df_copy.head()

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,PAY_6,BILL_AMT1,BILL_AMT2,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default_payment_next_month,HIGH_CREDIT_LIMIT,CREDIT_LIMIT_CATEGORY,LIMIT_BAL_GROUP,PAY_AMT_GROUP,PAY_AMT1_GROUP,PAY_AMT2_GROUP,PAY_AMT3_GROUP,PAY_AMT4_GROUP,PAY_AMT5_GROUP,PAY_AMT6_GROUP
0,1,20000,2,2,1,24,2,2,-1,-1,-2,-2,3913,3102,689,0,0,0,0.0,689.0,0.0,0.0,0.0,0.0,1,0,Low Credit Limit,Low Limit,Low Payment,Low Payment,Low Payment,Low Payment,Low Payment,Low Payment,Low Payment
1,2,120000,2,2,2,26,-1,2,0,0,0,2,2682,1725,2682,3272,3455,3261,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1,0,Medium Credit Limit,Medium Limit,Low Payment,Low Payment,Medium Payment,Medium Payment,Medium Payment,Low Payment,High Payment
2,3,90000,2,2,2,34,0,0,0,0,0,0,29239,14027,13559,14331,14948,15549,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,0,0,Medium Credit Limit,Medium Limit,Medium Payment,Medium Payment,Medium Payment,Medium Payment,Medium Payment,Medium Payment,Very High Payment
3,4,50000,2,2,1,37,0,0,0,0,0,0,46990,48233,49291,28314,28959,29547,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,0,0,Low Credit Limit,Low Limit,Medium Payment,Medium Payment,High Payment,Medium Payment,Medium Payment,Medium Payment,Medium Payment
4,5,50000,1,2,1,57,-1,0,-1,0,0,0,8617,5670,35835,20940,19146,19131,2000.0,3962.5,4174.125,3574.5,689.0,679.0,0,0,Low Credit Limit,Low Limit,Medium Payment,Medium Payment,Very High Payment,Very High Payment,Very High Payment,Medium Payment,Medium Payment


In [79]:
print(df_copy.columns)

Index(['ID', 'LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_0', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6', 'default_payment_next_month', 'HIGH_CREDIT_LIMIT', 'CREDIT_LIMIT_CATEGORY', 'LIMIT_BAL_GROUP', 'PAY_AMT_GROUP', 'PAY_AMT1_GROUP', 'PAY_AMT2_GROUP', 'PAY_AMT3_GROUP', 'PAY_AMT4_GROUP', 'PAY_AMT5_GROUP', 'PAY_AMT6_GROUP'], dtype='object')


In [80]:
# ID column i missing in the cleaned data so adding it back
df_copy["ID"] = df["ID"]

print(df_copy.columns)

Index(['ID', 'LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_0', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1', 'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6', 'default_payment_next_month', 'HIGH_CREDIT_LIMIT', 'CREDIT_LIMIT_CATEGORY', 'LIMIT_BAL_GROUP', 'PAY_AMT_GROUP', 'PAY_AMT1_GROUP', 'PAY_AMT2_GROUP', 'PAY_AMT3_GROUP', 'PAY_AMT4_GROUP', 'PAY_AMT5_GROUP', 'PAY_AMT6_GROUP'], dtype='object')


In [84]:
print(engine.url)

postgresql://credit_admin:***@localhost:5432/credit_risk_db


In [86]:
df_copy.to_sql("credit_data", engine, if_exists="replace", index=False)

111

In [87]:
print(df_copy.shape)

(29999, 35)
