Step 1: Create a Folder and Initialize a Git Repository
Open a terminal and create a new project folder:
code
mkdir ml_project
cd ml_project
git init

Step 2: Create a Virtual Environment
Create and activate a virtual environment:
code-
# Create a virtual environment
python -m venv venv

# Activate the virtual environment (Windows)
venv\Scripts\activate

# Activate the virtual environment (macOS/Linux)
source venv/bin/activate

Step 3: Create Folder Structure
Create the folder structure:
mkdir -p src/components src/pipeline
touch src/__init__.py src/logger.py src/exception.py src/utils.py
touch src/components/__init__.py src/components/data_ingestion.py src/components/data_transformation.py src/components/model_trainer.py
touch src/pipeline/__init__.py src/pipeline/predict_pipeline.py src/pipeline/train_pipeline.py
touch import_data.py setup.py requirements.txt
mkdir notebooks

Step 4: Initialize Git and Add Basic Files
Create .gitignore, README.md, and LICENSE files:

touch .gitignore README.md LICENSE

Add common Python ignores to .gitignore:

__pycache__/
*.pyc
*.pyo
.vscode/
.venv/
venv/
env/
.DS_Store

Add files and commit to git:

git add .
git commit -m "Initial project setup"

Step 5: Setup setup.py and requirements.txt
Open setup.py and add:
from setuptools import find_packages, setup

setup(
    name='ml_project',
    version='0.1',
    author='Your Name',
    author_email='your.email@example.com',
    packages=find_packages(),
    install_requires=[
        'pandas',
        'scikit-learn',
        'numpy',
        'Flask',
    ],
)

Open requirements.txt and add:

pandas
scikit-learn
numpy
Flask

Install the dependencies:
pip install -r requirements.txt

Step 6: Write Logging and Exception Modules
Open src/logger.py and add:

import logging
import os
from datetime import datetime

LOG_DIR = "logs"
os.makedirs(LOG_DIR, exist_ok=True)
LOG_FILE = os.path.join(LOG_DIR, f"log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")

logging.basicConfig(
    filename=LOG_FILE,
    format="%(asctime)s - %(levelname)s - %(message)s",
    level=logging.INFO
)

logger = logging.getLogger()

Open src/exception.py and add:

class CustomException(Exception):
    def __init__(self, message, error):
        super().__init__(message)
        self.error = error

    def __str__(self):
        return f"Error: {self.error}, Message: {self.message}"

Step 7: Create Jupyter Notebook
In the notebooks directory, create a new Jupyter notebook called eda_and_model_training.ipynb.
Step 8: Write the Import Data Script
Open import_data.py and add:

from sklearn.datasets import load_breast_cancer
import pandas as pd

def load_data():
    data = load_breast_cancer()
    df = pd.DataFrame(data.data, columns=data.feature_names)
    df['target'] = data.target
    return df

if __name__ == "__main__":
    df = load_data()
    print(df.head())

Step 9: Write the Data Ingestion Script
Open src/components/data_ingestion.py and add:
from import_data import load_data

class DataIngestion:
    def __init__(self):
        self.data = None

    def ingest_data(self):
        self.data = load_data()
        return self.data

if __name__ == "__main__":
    di = DataIngestion()
    data = di.ingest_data()
    print(data.head())

Step 10: Data Transformation
Open src/components/data_transformation.py and add:

from sklearn.preprocessing import StandardScaler

class DataTransformation:
    def __init__(self, data):
        self.data = data

    def transform_data(self):
        features = self.data.drop('target', axis=1)
        target = self.data['target']
        scaler = StandardScaler()
        features_scaled = scaler.fit_transform(features)
        return features_scaled, target

if __name__ == "__main__":
    from data_ingestion import DataIngestion
    di = DataIngestion()
    data = di.ingest_data()
    dt = DataTransformation(data)
    features, target = dt.transform_data()
    print(features[:5], target[:5])

Step 11: Model Trainer
Open src/components/model_trainer.py and add:

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

class ModelTrainer:
    def __init__(self, features, target):
        self.features = features
        self.target = target

    def train_model(self):
        X_train, X_test, y_train, y_test = train_test_split(self.features, self.target, test_size=0.2, random_state=42)
        model = LogisticRegression()
        model.fit(X_train, y_train)
        predictions = model.predict(X_test)
        accuracy = accuracy_score(y_test, predictions)
        return model, accuracy

if __name__ == "__main__":
    from data_ingestion import DataIngestion
    from data_transformation import DataTransformation

    di = DataIngestion()
    data = di.ingest_data()
    dt = DataTransformation(data)
    features, target = dt.transform_data()

    mt = ModelTrainer(features, target)
    model, accuracy = mt.train_model()
    print(f"Model Accuracy: {accuracy}")

Step 12: Flask Application
Create app.py and add:

from flask import Flask, request, jsonify
from src.components.data_ingestion import DataIngestion
from src.components.data_transformation import DataTransformation
from src.components.model_trainer import ModelTrainer

app = Flask(__name__)

@app.route('/train', methods=['GET'])
def train():
    di = DataIngestion()
    data = di.ingest_data()
    dt = DataTransformation(data)
    features, target = dt.transform_data()
    mt = ModelTrainer(features, target)
    model, accuracy = mt.train_model()
    return jsonify({"accuracy": accuracy})

if __name__ == '__main__':
    app.run(debug=True)

Step 13: Update Git Repository
Add and commit the changes to the Git repository:

git add .
git commit -m "Added ML project structure and initial code"
git push origin main
