In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [None]:
df = pd.read_csv("personal_transactions.csv")

In [None]:
df.head()

Feature engineering

 Time Period Selection

In [None]:
df['Date'] = pd.to_datetime(df['Date'])

In [None]:
def get_monthly_data(transactions):

  today = pd.Timestamp("today")
  last_month_start = today - pd.DateOffset(months=1)
  return transactions[(df['Date'] >= last_month_start) & (df['Date'] < today)]

monthly_data = get_monthly_data(df.copy())


In [None]:
monthly_data.head(10)

Past Spending Analysis

In [None]:
def calculate_avg_spend_per_category(data):

  grouped_data = df.groupby("Category").agg({"Amount": "sum", "Date": "count"})

  grouped_data["avg_spend"] = grouped_data["Amount"] / grouped_data["Date"]
  return grouped_data["avg_spend"]

avg_monthly_spend = calculate_avg_spend_per_category(monthly_data)


In [None]:
def prepare_data_for_model(df):
  """
  Prepares data for machine learning model.
  """
  # Feature selection (consider including additional features based on your data)
  features = ["Category"]  # Example features
  X = df[features]
  # Target variable (e.g., amount spent in the previous month)
  y = df["Amount"]
  # Split data into training and testing sets
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
  return X_train, X_test, y_train, y_test

# Prepare data for the model
X_train, X_test, y_train, y_test = prepare_data_for_model(monthly_data)


In [None]:
# Example model: Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)


In [None]:
# Predict spending for next month on the test set (example)
predicted_spend = model.predict(X_test)


In [None]:
# Combine past spending analysis and model predictions (adjust based on your needs)
suggested_budget = 0.8 * avg_monthly_spend + 0.2 * predicted_spend  # Example weighting

# Display or store suggested budget for each category
for category, budget in suggested_budget.items():
  print(f"Category: {category}, Suggested Budget: ${budget:.2f}")



In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor



# Define functions for data preparation and model training
def prepare_data(monthly_data):

  # Select relevant features
  features = ['Category', 'Month']  # You can add more features if needed
  X = monthly_data[features] # One-hot encode categorical features

  # Prepare target variable (average monthly spend per category)
  monthly_data['Month'] = pd.to_datetime(damonthly_datata['Date']).dt.month  # Extract month
  monthly_spend = monthly_data.groupby(['Category', 'Month'])['Amount'].sum().unstack()
  y = monthly_spend.mean(axis=1)  # Average monthly spend per category

  return X, y

def train_model(X, y):

  model = RandomForestRegressor(n_estimators=100, random_state=42)
  model.fit(X, y)
  return model

# Prepare data
X, y = prepare_data(df.copy())  # Copy data to avoid modifying original

# Train model
model = train_model(X, y)

# Function to predict budget suggestions
def predict_budget(model, features):
  X_pred = pd.DataFrame.from_dict([features])
  X_pred = pd.get_dummies(X_pred)  # One-hot encode categorical features
  predictions = model.predict(X_pred)
  return dict(zip(features['Category'], predictions))

# Example usage: predict budget suggestions for a specific category and month
category = "Restaurants"
month = 5  # Assuming May 2024
features = {'Category': category, 'Month': month}
budget_suggestions = predict_budget(model, features)
print(f"Predicted average spend for '{category}' in May 2024: ${budget_suggestions[category]:.2f}")
