In [None]:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.sql import text

# Database connection setup
db_config = {
    'host': 'localhost',
    'database': 'postgres',
    'user': 'postgres',
    'password': 'kaviyam123',
    'port': '5432'
}
connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

# Step 1: Load data from PostgreSQL
query = "SELECT * FROM cleaned_merged_base_data_check;"
df = pd.read_sql(query, con=engine)

# Step 2: Clean column names
df.rename(columns=lambda x: x.strip(), inplace=True)

# Step 3: Handle `Total Premium Payable`
# Fill nulls with empty strings for processing
df['Total Premium Payable'] = df['Total Premium Payable'].fillna('').astype(str)

# Identify non-numeric rows
non_numeric_mask = ~df['Total Premium Payable'].str.replace('.', '', 1).str.isdigit()

# Identify rows where `Total Premium Payable` is '0' or '0.0'
zero_premium_mask = df['Total Premium Payable'].isin(['0', '0.0'])

# Separate DataFrames
non_numeric_or_zero_df = df[non_numeric_mask | zero_premium_mask]
numeric_df = df[~non_numeric_mask & ~zero_premium_mask]  # Exclude rows with '0' or '0.0'

# Step 4: Save DataFrames
# Save the processed data into PostgreSQL
processed_table_name = 'cleaned_premnumeric_merged_base_data_check'

# Create a connection to the database
with engine.connect() as connection:
    # Drop the table if it exists
    drop_query = f"DROP TABLE IF EXISTS {processed_table_name};"
    connection.execute(text(drop_query))  # Execute the drop statement
    print(f"Table {processed_table_name} dropped successfully.")

    # Load the new data into the table
    numeric_df.to_sql(processed_table_name, con=engine, if_exists='replace', index=False)
    print(f"Data loaded into {processed_table_name} successfully.")

non_numeric_or_zero_df.to_csv('non_numeric_and_zero_total_premium.csv', index=False)

In [1]:
import pandas as pd
import numpy as np

# Creating DataFrame
df = pd.read_csv('non_numeric_and_zero_total_premium.csv')

# Step 1: Remove rows where all three (Total OD Premium, Total TP Premium, gst) are zero or null
df = df[~((df["Total OD Premium"].fillna(0) == 0) &
          (df["Total TP Premium"].fillna(0) == 0) &
          (df["GST"].fillna(0) == 0))]

# Step 2: Calculate gst if it is zero and round the value
df["GST"] = df["GST"].fillna(0)  
df.loc[df["GST"] == 0, "GST"] = ((df["Total OD Premium"].fillna(0) + df["Total TP Premium"].fillna(0)) * 0.18).round()

# Step 3: Calculate Total Premium Payable as the sum of Total OD Premium, Total TP Premium, and gst
df["Total Premium Payable"] = (
    df["Total OD Premium"].fillna(0) + df["Total TP Premium"].fillna(0) + df["GST"]
).round()

df.to_csv('cleaned_non_numeric_and_zero_total_premium.csv', index=False)

In [2]:
import pandas as pd
from sqlalchemy import create_engine

# Database connection setup
db_config = {
    'host': 'localhost',
    'database': 'postgres',
    'user': 'postgres',
    'password': 'kaviyam123',
    'port': '5432'
}
connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

# Step 1: Load data from PostgreSQL (existing table)
existing_data_query = "SELECT * FROM cleaned_premnumeric_merged_base_data_check;"
existing_data = pd.read_sql(existing_data_query, con=engine)

# Step 2: Load cleaned data from CSV
cleaned_data = pd.read_csv('cleaned_non_numeric_and_zero_total_premium.csv')

# Step 3: Ensure columns match
# Add missing columns to cleaned_data
for column in existing_data.columns:
    if column not in cleaned_data.columns:
        cleaned_data[column] = None

# Align column order to match the existing table
cleaned_data = cleaned_data[existing_data.columns]

# Step 4: Append the cleaned data to the existing table
merged_data = pd.concat([existing_data, cleaned_data], ignore_index=True)

# Step 5: Save the merged data back to PostgreSQL
merged_data.to_sql('cleanedprem_merged_base_data', con=engine, if_exists='replace', index=False)

  cleaned_data = pd.read_csv('cleaned_non_numeric_and_zero_total_premium.csv')


166