In [1]:
import pymongo
import pandas as pd
import os
import dotenv
import logging
import time
from datetime import datetime


In [2]:
class DataIngestion:
    def __init__(self, mongo_uri, db_name, collection_name):
        self.mongo_uri = mongo_uri
        self.client = pymongo.MongoClient(self.mongo_uri)
        self.db_name = db_name
        self.collection_name = collection_name
        self.db = self.client[self.db_name]
        self.collection = self.db[self.collection_name]

    def upload_csv_to_mongodb(self, csv_file_path):
        
        df = pd.read_csv(csv_file_path)
        print(f"Loaded {len(df)} records from {csv_file_path}")
        
        data = df.to_dict(orient='records')
        if data:
            self.collection.insert_many(data)
            print(f"Inserted {len(data)} records into {self.db_name}.{self.collection_name}")
        else:
            print("No data to insert.")
        

    def download_data_from_mongodb(self):
        # Lấy toàn bộ documents
        data = list(self.collection.find())

        if not data:
            print("Không có dữ liệu trong collection.")
            return

        # Xoá cột `_id` của MongoDB (tuỳ bạn giữ hay xoá)
        for item in data:
            item.pop('_id', None)

        # Chuyển sang DataFrame
        df = pd.DataFrame(data)
        return df
    
    def delete_all_data(self):
        result = self.collection.delete_many({})
        print(f"Đã xoá {result.deleted_count} documents khỏi {self.db_name}.{self.collection_name}")


        # Đóng kết nối
    def __del__(self):
        self.client.close()
        print("MongoDB connection closed.")



In [5]:
if __name__ == "__main__":
    # Load environment variables from .env file
    dotenv.load_dotenv(override=True) 

    mongo_uri = os.getenv("MONGO_URI")
    db_name = os.getenv("DB_NAME")
    collection_name = os.getenv("COLLECTION_NAME")
    csv_file_path = os.getenv("CSV_FILE")

    data_ingestion = DataIngestion(mongo_uri, db_name, collection_name)
    # data_ingestion.delete_all_data()  # Xoá dữ liệu cũ nếu cần

    data_ingestion.upload_csv_to_mongodb(csv_file_path)

    df = data_ingestion.download_data_from_mongodb()
    if df is not None:
        print(df.head())

Loaded 37010 records from D:\Show_me_everything\midterm_mlops\data\web_churn.csv
MongoDB connection closed.
Inserted 37010 records into mlops.root_file
            user_id  age gender region_category membership_category  \
0  9f420209e7d129f3   29      F             NaN       No Membership   
1  ac6e97806267549e   50      M             NaN    Basic Membership   
2  a6aa19b1580eed4e   26      F            City   Silver Membership   
3  aeee343277211c2f   63      F         Village       No Membership   
4  82448b5c8ce6390c   64      M            Town   Silver Membership   

  joining_date joined_through_referral     preferred_offer_types  \
0   2017-04-05                     Yes            Without Offers   
1   2017-03-31                     Yes            Without Offers   
2   2017-02-11                     NaN  Credit/Debit Card Offers   
3   2015-12-23                      No  Credit/Debit Card Offers   
4   2015-03-20                     NaN     Gift Vouchers/Coupons   

  medium_of_

In [24]:
df

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.5,No
7039,2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,...,Yes,No,Yes,Yes,One year,Yes,Credit card (automatic),103.20,7362.9,No
7040,4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,No
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes
