In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import pdfplumber
import re

# Function to extract text from PDF using pdfplumber
def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = ''
        for page in pdf.pages:
            text += page.extract_text()
    return text

# Function to process extracted PDF text and extract relevant financial data
def process_pdf_text(text):
    # This is a simplified approach, you would need to tailor this based on your PDF structure
    # Extracting values using regular expressions or simple string manipulations
    income_match = re.search(r'Income:\s*(\d+)', text)
    debt_match = re.search(r'Debt:\s*(\d+)', text)
    savings_match = re.search(r'Savings:\s*(\d+)', text)

    # Fallback to 0 if no match is found
    income = int(income_match.group(1)) if income_match else 0
    debt = int(debt_match.group(1)) if debt_match else 0
    savings = int(savings_match.group(1)) if savings_match else 0

    return income, debt, savings

# Load dataset (Replace this with your actual CSV file path)
data = pd.read_csv('/content/combined_dataset.csv')  # Replace with the path to your CSV file

# Feature engineering (simplified)
data['DebtToIncomeRatio'] = data['LoanAmount'] / data['Income']
data['TotalDebt'] = data['MonthlyDebtPayments'] * 12  # Approx annual debt

# Create the financial_health_score column based on a simple formula
# We can use income, debt-to-income ratio, and savings as simple features to calculate the health score.
data['financial_health_score'] = (
    (data['Income'] / 1000) +  # Income contributes positively
    (100 - data['DebtToIncomeRatio'] * 100) +  # Debt-to-Income ratio, lower is better
    (data['SavingsAccountBalance'] / 1000)  # Savings balance contributes positively
)

# Normalize the score to be between 0 and 100
data['financial_health_score'] = np.clip(data['financial_health_score'], 0, 100)

# Fill missing values if any
data = data.fillna(0)

# Features and target
X = data[['Income', 'DebtToIncomeRatio', 'LoanAmount', 'TotalDebt', 'SavingsAccountBalance']]
y = data['financial_health_score']  # Now it exists in the dataset

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build the neural network model
model = keras.Sequential([
    layers.InputLayer(input_shape=(X_train_scaled.shape[1],)),  # Input layer
    layers.Dense(64, activation='relu'),  # First hidden layer with 64 units
    layers.Dense(32, activation='relu'),  # Second hidden layer with 32 units
    layers.Dense(1)  # Output layer with a single unit (financial health score)
])

# Compile the model
model.compile(optimizer='adam', loss='mse')  # Using mean squared error loss for regression

# Train the model
model.fit(X_train_scaled, y_train, epochs=20, batch_size=32, validation_data=(X_test_scaled, y_test))

# Evaluate the model
predictions = model.predict(X_test_scaled)
rmse = np.sqrt(mean_squared_error(y_test, predictions))

print(f'Model RMSE: {rmse}')

# Function to get user input or process PDF
def get_user_input_or_pdf():
    choice = input("Do you want to input your values manually (1) or upload a bank statement PDF (2)? Enter 1 or 2: ")
    
    if choice == '1':
        income = float(input("Enter monthly income: "))
        debt = float(input("Enter monthly debt payments: "))
        debt_to_income = float(input("Enter debt-to-income ratio: "))
        loan_amount = float(input("Enter loan amount: "))
        loan_duration = float(input("Enter loan duration (in years): "))
        num_dependents = int(input("Enter number of dependents: "))
        savings = float(input("Enter savings account balance: "))
        checking = float(input("Enter checking account balance: "))
        total_assets = float(input("Enter total assets: "))
        
        return pd.DataFrame([{
            'Income': income, 
            'DebtToIncomeRatio': debt_to_income, 
            'LoanAmount': loan_amount, 
            'TotalDebt': debt, 
            'SavingsAccountBalance': savings
        }])
    
    elif choice == '2':
        pdf_path = input("Enter the path to your bank statement PDF: ")
        
        # Ensure the file exists before processing
        try:
            text = extract_text_from_pdf(pdf_path)
            income, debt, savings = process_pdf_text(text)
            print(f"Extracted income: {income}, debt: {debt}, savings: {savings}")
            
            return pd.DataFrame([{
                'Income': income, 
                'DebtToIncomeRatio': debt / income if income > 0 else 0, 
                'LoanAmount': debt, 
                'TotalDebt': debt, 
                'SavingsAccountBalance': savings
            }])
        except Exception as e:
            print(f"Error processing the PDF: {e}")
            return pd.DataFrame()

# Get user input or PDF data
user_data = get_user_input_or_pdf()

if not user_data.empty:
    # Scale the user data and predict the financial health score
    user_data_scaled = scaler.transform(user_data)
    user_score = model.predict(user_data_scaled)[0][0]

    print(f"Predicted Financial Health Score: {user_score}")

    # Provide advice based on the score
    if user_score < 40:
        advice = "Your financial health score is low. Focus on reducing unnecessary expenses and building savings."
    elif user_score < 70:
        advice = "Your financial health is average. Consider paying off debts and increasing savings."
    else:
        advice = "Your financial health is good. Keep up the good work, and consider diversifying investments."

    print(f"Advice: {advice}")
else:
    print("No valid data provided.")
