In [1]:
# Step 1: Import the necessary libraries

import pandas as pd         # For data manipulation and CSV handling
from pymongo import MongoClient  # For connecting to MongoDB
import csv                 # For CSV operations
import numpy as np         # For numerical operations

In [2]:
# Step 2: Create a Python class named "User"
# This class will hold the structure for each user’s data.

class User:
    def __init__(self, age, gender, total_income, expenses):
        self.age = age
        self.gender = gender
        self.total_income = total_income
        self.expenses = expenses

    def to_dict(self):
        """Convert the user object to a dictionary for easier CSV writing."""
        return {
            'age': self.age,
            'gender': self.gender,
            'total_income': self.total_income,
            'utilities': self.expenses.get('utilities', 0),
            'entertainment': self.expenses.get('entertainment', 0),
            'school_fees': self.expenses.get('school_fees', 0),
            'shopping': self.expenses.get('shopping', 0),
            'healthcare': self.expenses.get('healthcare', 0)
        }


In [3]:
# Step 3: Loop through collected data and store it in a CSV file
# (1). Connect to MongoDB, (2). retrieve the data, and (3). write it to a CSV file. 

from pymongo import MongoClient
import csv

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['user_data_db']
collection = db['user_data']

# Fetch all user data from MongoDB
users_data = collection.find()

# Open a CSV file to write the data
with open('collected_data.csv', 'w', newline='') as file:
    fieldnames = ['age', 'gender', 'total_income', 'utilities', 'entertainment', 'school_fees', 'shopping', 'healthcare']
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    
    # Write header
    writer.writeheader()
    
    # Loop through users data and write to CSV
    for user_data in users_data:
        user = User(user_data['age'], user_data['gender'], user_data['total_income'], user_data['expenses'])
        writer.writerow(user.to_dict())


In [4]:
# Step 4: Load the CSV file into a Jupyter notebook
# Use pandas to load and process the CSV data

import pandas as pd

# Load the CSV file into a DataFrame
df = pd.read_csv('collected_data.csv')

# Display the first few rows of the DataFrame
df.head()


Unnamed: 0,age,gender,total_income,utilities,entertainment,school_fees,shopping,healthcare
0,49,male,250000.0,10000.0,5000.0,97000.0,20000.0,20000.0
1,49,male,200000.0,10000.0,12000.0,15000.0,13000.0,14000.0
2,49,male,13000.0,23000.0,12000.0,10000.0,12000.0,11000.0
3,49,male,150000.0,12000.0,15000.0,10000.0,16000.0,20000.0
4,49,female,16000.0,12000.0,15000.0,10000.0,15000.0,10000.0


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   age            7 non-null      int64  
 1   gender         7 non-null      object 
 2   total_income   7 non-null      float64
 3   utilities      7 non-null      float64
 4   entertainment  7 non-null      float64
 5   school_fees    7 non-null      float64
 6   shopping       7 non-null      float64
 7   healthcare     7 non-null      float64
dtypes: float64(6), int64(1), object(1)
memory usage: 576.0+ bytes
