In [None]:
import pandas as pd

# Load the datasets (replace 'path_to_your_dataset.csv' with the actual paths)
financial_ledger_df = pd.read_csv('FinLedger.csv')
useeio_df = pd.read_csv('useeio.csv')

# Display initial information about the datasets
print("Financial Ledger Dataset Info:")
print(financial_ledger_df.info())
print("\nUSEEIO Dataset Info:")
print(useeio_df.info())

# Data Cleaning for Financial Ledger Transactions

# 1. Identify and remove duplicate entries
financial_ledger_df = financial_ledger_df.drop_duplicates()

# 2. Identify and remove irrelevant entries
# Example criteria: Filter based on date range and relevance to scope 3 emissions
date_column = 'transaction_date'
start_date = '2020-01-01'
end_date = '2023-12-31'
relevant_criteria = 'Scope 3'  # This could be a column indicating relevance to scope 3 emissions

financial_ledger_df[date_column] = pd.to_datetime(financial_ledger_df[date_column])
financial_ledger_df = financial_ledger_df[
    (financial_ledger_df[date_column] >= start_date) &
    (financial_ledger_df[date_column] <= end_date) &
    (financial_ledger_df['emission_scope'] == relevant_criteria)
]

# 3. Additional filtering based on data quality if needed
financial_ledger_df = financial_ledger_df.dropna(subset=['amount', 'emission_factor'])

# Data Cleaning for USEEIO Dataset

# 1. Identify and remove duplicate entries
useeio_df = useeio_df.drop_duplicates()

# 2. Identify and remove irrelevant entries
# Example criteria: Filter based on relevance to scope 3 emissions
relevant_column = 'emission_type'
relevant_value = 'Scope 3'

useeio_df = useeio_df[useeio_df[relevant_column] == relevant_value]

# 3. Additional filtering based on data quality if needed
# Example: Remove rows with missing values in critical columns
useeio_df = useeio_df.dropna(subset=['emission_factor', 'sector'])

# Display cleaned datasets information
print("\nCleaned Financial Ledger Dataset Info:")
print(financial_ledger_df.info())
print("\nCleaned USEEIO Dataset Info:")
print(useeio_df.info())

# Save the cleaned datasets
financial_ledger_df.to_csv('cleaned_financial_ledger.csv', index=False)
useeio_df.to_csv('cleaned_useeio.csv', index=False)
