In [4]:
import pandas as pd
import random

# Define expense categories and their corresponding rounding off amounts
expense_categories = ['luxury', 'basic', 'daily', 'commute']
rounding_off_factors = {'luxury': 50, 'basic': 10, 'daily': 15, 'commute': 20}

# Generate synthetic data
num_entries = 1000  # Adjust the number of entries as needed
expenses_data = {
    'expense_amount': [random.uniform(50, 500) for _ in range(num_entries)],
    'expense_category': [random.choice(expense_categories) for _ in range(num_entries)]
}

# Function to round off to nearest tens or hundreds strictly greater than the original value
def round_nearest(value):
    return ((value + 9) // 10) * 10  # To round off to nearest tens

def round_nearest_hundreds(value):
    return ((value + 99) // 100) * 100  # To round off to nearest hundreds

# Calculate rounded-off amount based on expense category
expenses_data['rounded_off_amount'] = [
    round_nearest(amount) if rounding_off_factors[category] != 50 else round_nearest_hundreds(amount)
    for amount, category in zip(expenses_data['expense_amount'], expenses_data['expense_category'])
]

# Create a DataFrame
df = pd.DataFrame(expenses_data)

# Display a sample of the generated dataset
print(df)


     expense_amount expense_category  rounded_off_amount
0        268.086151          commute               270.0
1        479.541131           luxury               500.0
2        195.390788            basic               200.0
3        121.966307            basic               130.0
4        432.892685            daily               440.0
..              ...              ...                 ...
995      132.291423           luxury               200.0
996      339.293748          commute               340.0
997      368.198223            basic               370.0
998      280.619894            daily               280.0
999      219.005059           luxury               300.0

[1000 rows x 3 columns]


In [5]:
df.to_csv('auto_round_off.csv')

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, accuracy_score

X = df[['expense_amount']]
y = df['rounded_off_amount']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)

rf_regressor.fit(X_train, y_train)

predictions = rf_regressor.predict(X_test)

new_expense_amount = 353
predicted_rounded_amount = rf_regressor.predict([[new_expense_amount]])
print(f"Predicted Rounded-off Amount: {predicted_rounded_amount[0]}")


Predicted Rounded-off Amount: 375.2


