In [None]:
from datetime import timedelta
import pandas as pd
from sqlalchemy import create_engine

# Database connection setup
db_config = {
    'host': 'localhost',
    'database': 'postgres',
    'user': 'postgres',
    'password': 'kaviyam123',
    'port': '5432'
}
connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

query = "SELECT * FROM public.cleaned_appended_base_and_pr;"
df = pd.read_sql(query, con=engine)

In [None]:
# Convert date columns (using lowercase column names)
df['policy start date'] = pd.to_datetime(df['policy start date'], errors='coerce')
df['policy end date'] = pd.to_datetime(df['policy end date'], errors='coerce')

# Initialize 'booked' and 'old policy no' columns if not present
if 'booked' not in df.columns:
    df['booked'] = ''
df['booked'] = df['booked'].fillna('').astype(str)

if 'old policy no' not in df.columns:
    df['old policy no'] = ''
df['old policy no'] = df['old policy no'].fillna('').astype(str)

def update_booked(group):
    group = group.sort_values(by='policy start date')
    for i in range(len(group) - 1):
        current_policy = group.iloc[i]
        next_policy = group.iloc[i + 1]
        if current_policy['booked'] in ['', 'None']:
            if next_policy['policy start date'] == current_policy['policy end date'] + timedelta(days=1):
                group.loc[current_policy.name, 'booked'] = '1.0'
                if next_policy['old policy no'] in ['', 'None']:
                    group.loc[next_policy.name, 'old policy no'] = current_policy['policy no']
            elif next_policy['policy start date'] > current_policy['policy end date'] + timedelta(days=1):
                group.loc[current_policy.name, 'booked'] = '1.0'
                if next_policy['old policy no'] in ['', 'None']:
                    group.loc[next_policy.name, 'old policy no'] = current_policy['policy no']
    last_policy = group.iloc[-1]
    if last_policy['booked'] in ['', 'None']:
        if last_policy['policy end date'] >= pd.Timestamp("2025-01-01"):
            group.loc[last_policy.name, 'booked'] = '-'
        else:
            group.loc[last_policy.name, 'booked'] = '0.0'
    return group

def correct_booked(group):
    group = group.sort_values(by='policy start date')
    for i in range(len(group) - 1):
        current_policy = group.iloc[i]
        next_policy = group.iloc[i + 1]
        if current_policy['booked'] == '0.0':
            if next_policy['policy start date'] == current_policy['policy end date'] + timedelta(days=1):
                group.loc[current_policy.name, 'booked'] = '1.0'
                if next_policy['old policy no'] in ['', 'None']:
                    group.loc[next_policy.name, 'old policy no'] = current_policy['policy no']
            elif next_policy['policy start date'] > current_policy['policy end date'] + timedelta(days=1):
                group.loc[current_policy.name, 'booked'] = '1.0'
                if next_policy['old policy no'] in ['', 'None']:
                    group.loc[next_policy.name, 'old policy no'] = current_policy['policy no']
    return group

In [None]:
# Apply the functions to each group based on "Trim Policy No"
df = df.groupby('Trim Policy No').apply(update_booked).reset_index(drop=True)
df_cleaned = df.groupby('Trim Policy No').apply(correct_booked).reset_index(drop=True)

In [None]:

df_cleaned.to_sql('cleaned_booked_oldpolicy_base_pr', con=engine, if_exists='replace', index=False)

In [1]:
from datetime import timedelta
import pandas as pd
from sqlalchemy import create_engine

# Database connection setup
db_config = {
    'host': 'localhost',
    'database': 'postgres',
    'user': 'postgres',
    'password': 'kaviyam123',
    'port': '5432'
}
connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}"
engine = create_engine(connection_string)

query = "SELECT * FROM public.cleaned_appended_base_and_pr;"
df = pd.read_sql(query, con=engine)

In [None]:
# Convert date columns to datetime format
df['policy start date'] = pd.to_datetime(df['policy start date'], errors='coerce')
df['policy end date'] = pd.to_datetime(df['policy end date'], errors='coerce')

# Initialize 'booked' and 'old policy no' columns if not already present
if 'booked' not in df.columns:
    df['booked'] = ''
df['booked'] = df['booked'].fillna('').astype(str)

if 'old policy no' not in df.columns:
    df['old policy no'] = ''
df['old policy no'] = df['old policy no'].fillna('').astype(str)

# Define "today" for comparison (used in handling the last policy)
today = pd.Timestamp.now().normalize()

def update_booked(group):
    # Sort policies by start date within the group
    group = group.sort_values(by='policy start date')
    for i in range(len(group) - 1):
        current_policy = group.iloc[i]
        next_policy = group.iloc[i + 1]
        # Only process rows where booked is empty
        if current_policy['booked'] in ['', 'None']:
            # Check if the next policy starts the day after the current one ends
            if next_policy['policy start date'] == current_policy['policy end date'] + timedelta(days=1):
                group.loc[current_policy.name, 'booked'] = '1.0'
            elif next_policy['policy start date'] > current_policy['policy end date'] + timedelta(days=1):
                group.loc[current_policy.name, 'booked'] = '1.0'
    # Handle the last policy for the group using the specified date condition
    last_policy = group.iloc[-1]
    if last_policy['booked'] in ['', 'None']:
        if last_policy['policy end date'] >= pd.Timestamp("2025-01-01"):
            group.loc[last_policy.name, 'booked'] = '-'
        else:
            group.loc[last_policy.name, 'booked'] = '0.0'
    return group

# Apply the update_booked function to each group based on "Trim Policy No"
df = df.groupby('Trim Policy No').apply(update_booked).reset_index(drop=True)

In [None]:
def correct_booked(group):
    group = group.sort_values(by='policy start date')
    for i in range(len(group) - 1):
        current_policy = group.iloc[i]
        next_policy = group.iloc[i + 1]
        # Check if the current policy is marked as '0.0' but satisfies the condition
        if current_policy['booked'] == '0.0':
            if next_policy['policy start date'] == current_policy['policy end date'] + timedelta(days=1):
                group.loc[current_policy.name, 'booked'] = '1.0'
            elif next_policy['policy start date'] > current_policy['policy end date'] + timedelta(days=1):
                group.loc[current_policy.name, 'booked'] = '1.0'
    return group

# Apply the correct_booked function to each group based on "Trim Policy No"
df = df.groupby('Trim Policy No').apply(correct_booked).reset_index(drop=True)

In [None]:
def map_old_policy_no(group):
    group = group.sort_values(by='policy start date')
    for i in range(len(group) - 1):
        current_policy = group.iloc[i]
        next_policy = group.iloc[i + 1]
        # Map the old policy number if not already set
        if next_policy['old policy no'] in ['', 'None']:
            if next_policy['policy start date'] == current_policy['policy end date'] + timedelta(days=1):
                group.loc[next_policy.name, 'old policy no'] = current_policy['policy no']
            elif next_policy['policy start date'] > current_policy['policy end date'] + timedelta(days=1):
                group.loc[next_policy.name, 'old policy no'] = current_policy['policy no']
    return group

# Apply the map_old_policy_no function to each group based on "Trim Policy No"
df_cleaned = df.groupby('Trim Policy No').apply(map_old_policy_no).reset_index(drop=True)

In [None]:

df_cleaned.to_sql('cleaned_booked_oldpolicy_base_pr', con=engine, if_exists='replace', index=False)