In [1]:
import pymongo
import pandas as pd
import os
import dotenv
import logging
import time
from datetime import datetime


In [17]:
class DataIngestion:
    def __init__(self, mongo_uri, db_name, collection_name):
        self.mongo_uri = mongo_uri
        self.client = pymongo.MongoClient(self.mongo_uri)
        self.db_name = db_name
        self.collection_name = collection_name
        self.db = self.client[self.db_name]
        self.collection = self.db[self.collection_name]

    def upload_csv_to_mongodb(self, csv_file_path):
        
        df = pd.read_csv(csv_file_path)
        print(f"Loaded {len(df)} records from {csv_file_path}")
        
        data = df.to_dict(orient='records')
        if data:
            self.collection.insert_many(data)
            print(f"Inserted {len(data)} records into {self.db_name}.{self.collection_name}")
        else:
            print("No data to insert.")
        

    def download_data_from_mongodb(self):
        # Lấy toàn bộ documents
        data = list(self.collection.find())

        if not data:
            print("Không có dữ liệu trong collection.")
            return

        # Xoá cột `_id` của MongoDB (tuỳ bạn giữ hay xoá)
        for item in data:
            item.pop('_id', None)

        # Chuyển sang DataFrame
        df = pd.DataFrame(data)
        return df
    
    def delete_all_data(self):
        result = self.collection.delete_many({})
        print(f"Đã xoá {result.deleted_count} documents khỏi {self.db_name}.{self.collection_name}")


        # Đóng kết nối
    def __del__(self):
        self.client.close()
        print("MongoDB connection closed.")



In [16]:
if __name__ == "__main__":
    # Load environment variables from .env file
    dotenv.load_dotenv()

    mongo_uri = os.getenv("MONGO_URI")
    db_name = os.getenv("DB_NAME")
    collection_name = os.getenv("COLLECTION_NAME")
    csv_file_path = os.getenv("CSV_FILE")

    data_ingestion = DataIngestion(mongo_uri, db_name, collection_name)
    data_ingestion.delete_all_data()  # Xoá dữ liệu cũ nếu cần

    data_ingestion.upload_csv_to_mongodb(csv_file_path)

    df = data_ingestion.download_data_from_mongodb()
    if df is not None:
        print(df.head())

Đã xoá 10000 documents khỏi mlops.root_file
Loaded 10000 records from D:\Show_me_everything\midterm_mlops\data\Telecom_customer_churn.csv


KeyboardInterrupt: 

In [12]:
df

Unnamed: 0,Customer_ID,rev_Mean,mou_Mean,totmrc_Mean,da_Mean,ovrmou_Mean,ovrrev_Mean,vceovr_Mean,datovr_Mean,roam_Mean,...,refurb_new,hnd_price,phones,models,marital,adults,income,numbcars,ethnic,creditcd
0,1867825,299.671415,286.173570,31.476885,0.804606,45.316933,8.829315,35.792128,6.534869,7.652628,...,R,74.592336,3,1,M,5,81992.93952,2,C,Y
1,3744854,92.584471,363.795294,4.555770,0.713896,31.324743,13.921041,13.851242,5.665785,3.096415,...,R,186.391656,1,2,M,2,47618.61279,1,A,Y
2,1458591,371.805044,261.054198,20.571977,0.575645,88.676796,10.404854,15.835436,6.715007,13.590641,...,N,96.455375,2,1,M,5,69649.44830,1,A,N
3,499914,237.586761,374.034082,20.475378,0.655410,70.911423,8.289291,10.739534,3.974069,13.550546,...,N,238.272712,1,1,M,1,68099.63422,3,A,N
4,8478454,463.464125,322.684882,41.554616,0.407415,51.731316,7.035586,13.199957,4.473188,8.596726,...,R,133.701343,1,2,D,5,125079.70250,2,B,Y
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,7271733,157.025824,432.569779,20.847124,0.432566,13.393915,16.830843,15.784978,6.259984,10.794632,...,R,143.321136,1,2,M,3,19994.31289,1,B,N
19996,714120,369.972222,287.524120,27.361740,0.049464,72.983420,6.200724,21.492590,6.467971,8.973374,...,R,146.929253,3,2,M,4,68875.63353,2,C,N
19997,9115786,249.429742,317.976964,19.561120,0.414199,58.861880,9.780115,25.808462,6.524170,11.447157,...,N,193.081904,2,2,S,1,52935.01142,1,C,Y
19998,2844739,139.008133,197.331937,14.508734,0.613745,33.759629,17.946851,21.232987,2.884560,1.938345,...,N,123.943428,1,2,S,5,69613.21020,1,C,Y
