In [None]:

# 1. **Set Up Project Structure**:
#    - Create the necessary folders and files.
#    - Initialize a Git repository.
#    - Create a virtual environment.
   
# 2. **Create and Configure Files**:
#    - Implement the logging and exception handling.
#    - Write utility functions.
#    - Create the necessary components for data ingestion, data transformation, model training, and pipelines.
#    - Create the setup script and requirements file.
   
# 3. **Implement Machine Learning Pipeline**:
#    - Perform EDA, feature engineering, and model training in a Jupyter notebook.
#    - Write a script to import the dataset into MongoDB and another to ingest data from MongoDB.
#    - Create and train the ML model.
   
# 4. **Deploy the Project Using Flask**:
#    - Set up a Flask application to serve the model.

# 5. **Push the Code to GitHub**:
#    - Commit and push the code to a GitHub repository.
#    - Ensure the repository is publicly accessible.


# ### 1. Set Up Project Structure

# ```bash
# mkdir ml_project
# cd ml_project
# git init
# python -m venv venv
# source venv/bin/activate
# ```

# ### 2. Create and Configure Files

# ```python
# # Create the folder structure
# import os

# folders = [
#     'src',
#     'src/components',
#     'src/pipeline',
#     'notebooks'
# ]

# files = [
#     'src/__init__.py',
#     'src/logger.py',
#     'src/exception.py',
#     'src/utils.py',
#     'src/components/__init__.py',
#     'src/components/data_ingestion.py',
#     'src/components/data_transformation.py',
#     'src/components/model_trainer.py',
#     'src/pipeline/__init__.py',
#     'src/pipeline/predict_pipeline.py',
#     'src/pipeline/train_pipeline.py',
#     'import_data.py',
#     'setup.py',
#     'requirements.txt',
#     'README.md',
#     'LICENSE',
#     '.gitignore'
# ]

# for folder in folders:
#     os.makedirs(folder, exist_ok=True)

# for file in files:
#     with open(file, 'w') as f:
#         pass
# ```

# ### logger.py

# ```python
# import logging
# import os
# from datetime import datetime

# LOG_FILE = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.log"
# LOG_PATH = os.path.join(os.getcwd(), "logs", LOG_FILE)
# os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)

# logging.basicConfig(
#     filename=LOG_PATH,
#     format='[%(asctime)s] %(name)s - %(levelname)s - %(message)s',
#     level=logging.INFO
# )

# def get_logger():
#     return logging.getLogger()
# ```

# ### exception.py

# ```python
# import sys

# def error_message_detail(error, error_detail: sys):
#     _, _, exc_tb = error_detail.exc_info()
#     file_name = exc_tb.tb_frame.f_code.co_filename
#     error_message = f"Error occurred in script: {file_name}, line number: {exc_tb.tb_lineno}, error message: {str(error)}"
#     return error_message

# class CustomException(Exception):
#     def __init__(self, error_message, error_detail: sys):
#         super().__init__(error_message)
#         self.error_message = error_message_detail(error_message, error_detail)
    
#     def __str__(self):
#         return self.error_message
# ```

# ### utils.py

# ```python
# import os
# import yaml

# def read_yaml(file_path):
#     with open(file_path, 'r') as file:
#         return yaml.safe_load(file)
    
# def create_directory(dir_path):
#     os.makedirs(dir_path, exist_ok=True)
# ```

# ### import_data.py

# ```python
# from sklearn.datasets import load_breast_cancer
# import pandas as pd
# from pymongo import MongoClient

# def load_and_save_data():
#     data = load_breast_cancer()
#     df = pd.DataFrame(data.data, columns=data.feature_names)
#     df['target'] = data.target
    
#     client = MongoClient("mongodb://localhost:27017/")
#     db = client["ml_project"]
#     collection = db["breast_cancer"]
#     collection.insert_many(df.to_dict('records'))
#     client.close()
    
# if __name__ == "__main__":
#     load_and_save_data()
# ```

# ### data_ingestion.py

# ```python
# from pymongo import MongoClient
# import pandas as pd

# def load_data_from_mongodb():
#     client = MongoClient("mongodb://localhost:27017/")
#     db = client["ml_project"]
#     collection = db["breast_cancer"]
#     data = pd.DataFrame(list(collection.find()))
#     client.close()
#     return data
# ```

# ### data_transformation.py

# ```python
# from sklearn.preprocessing import StandardScaler

# def transform_data(df):
#     scaler = StandardScaler()
#     features = df.drop(columns=['target'])
#     scaled_features = scaler.fit_transform(features)
#     return scaled_features, df['target']
# ```

# ### model_trainer.py

# ```python
# from sklearn.model_selection import train_test_split
# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import accuracy_score

# def train_model(X, y):
#     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
#     model = LogisticRegression()
#     model.fit(X_train, y_train)
#     y_pred = model.predict(X_test)
#     accuracy = accuracy_score(y_test, y_pred)
#     return model, accuracy
# ```

# ### train_pipeline.py

# ```python
# from src.components.data_ingestion import load_data_from_mongodb
# from src.components.data_transformation import transform_data
# from src.components.model_trainer import train_model
# from src.logger import get_logger

# logger = get_logger()

# if __name__ == "__main__":
#     logger.info("Loading data from MongoDB")
#     df = load_data_from_mongodb()
    
#     logger.info("Transforming data")
#     X, y = transform_data(df)
    
#     logger.info("Training model")
#     model, accuracy = train_model(X, y)
    
#     logger.info(f"Model accuracy: {accuracy}")
# ```

# ### setup.py

# ```python
# from setuptools import setup, find_packages

# setup(
#     name="ml_project",
#     version="0.1",
#     packages=find_packages(),
#     install_requires=[
#         "numpy",
#         "pandas",
#         "scikit-learn",
#         "pymongo",
#         "Flask",
#         "gunicorn"
#     ]
# )
# ```

# ### requirements.txt

# ```
# numpy
# pandas
# scikit-learn
# pymongo
# Flask
# gunicorn
# ```

# ### Flask Application

# Create a file named `app.py` for the Flask application:

# ```python
# from flask import Flask, request, jsonify
# from src.components.data_ingestion import load_data_from_mongodb
# from src.components.data_transformation import transform_data
# from src.components.model_trainer import train_model

# app = Flask(__name__)

# @app.route('/train', methods=['POST'])
# def train():
#     df = load_data_from_mongodb()
#     X, y = transform_data(df)
#     model, accuracy = train_model(X, y)
#     return jsonify({"accuracy": accuracy})

# if __name__ == "__main__":
#     app.run(debug=True)
# ```

# ### 3. Perform EDA and Feature Engineering in Jupyter Notebook

# Create a Jupyter notebook in the `notebooks` folder and perform EDA, feature engineering, and model training. Ensure the notebook is well-documented with explanations of each step.

# ### 4. Push Code to GitHub

# After implementing the above steps, use the following commands to push your code to GitHub:

# ```bash
# git add .
# git commit -m "Initial commit with project structure and code"
# git remote add origin <your-github-repo-url>
# git push -u origin master
# ```

# #