<a href="https://colab.research.google.com/github/Altaieb-Mohammed/lab_2corse/blob/master/c_lab1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:


import pandas as pd


def clean_inheritance_data(input_path, output_path):
    url = "https://raw.githubusercontent.com/Altaieb-Mohammed/lab_2corse/master/final_inheritanc.csv"
    # Load the raw data
    df = pd.read_csv(url)

    # 1. Remove incomplete last row
    df = df.iloc[:-1]

    # 2. Convert all columns to numeric
    numeric_cols = df.columns
    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')

    # 3. Fix binary columns (0 or 1 only)
    binary_cols = ['husband', 'wives', 'father', 'mother', 'grandfather', 'grandmother']
    for col in binary_cols:
        df[col] = df[col].clip(0, 1).astype(int)

    # 4. Fix count columns (non-negative integers)
    count_cols = ['sons', 'daughters', 'brothers_m', 'sisters_m']
    for col in count_cols:
        df[col] = df[col].clip(0).astype(int)

    # 5. Ensure financial columns are non-negative
    financial_cols = ['estate', 'debts', 'wills'] + [col for col in df.columns if 'share_' in col]
    for col in financial_cols:
        df[col] = df[col].clip(0)

    # 6. Enforce logical consistency
    df.loc[df['husband'] == 0, 'share_husband'] = 0
    df.loc[df['wives'] == 0, 'share_wife'] = 0
    df.loc[df['father'] == 0, 'share_father'] = 0
    df.loc[df['mother'] == 0, 'share_mother'] = 0
    df.loc[df['sons'] == 0, 'share_sons'] = 0
    df.loc[df['daughters'] == 0, 'share_daughters'] = 0

    # 7. Remove cases with nothing to distribute
    df = df[df['estate'] > (df['debts'] + df['wills'])]

    # 8. Save cleaned data
    df.to_csv(output_path, index=False)
    print(f"Successfully cleaned data saved to {output_path}")

if __name__ == "__main__":
    input_file = "final_inheritanc.csv"  # Change if your file has different name
    output_file = "cleaned_inheritance.csv"
    clean_inheritance_data(input_file, output_file)

Successfully cleaned data saved to cleaned_inheritance.csv


In [18]:
import os
print(os.path.abspath("cleaned_inheritance.csv"))

/content/cleaned_inheritance.csv


In [19]:
from google.colab import files
files.download("/content/cleaned_inheritance.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>