<a href="https://colab.research.google.com/github/Anonymous143w/New-TP/blob/main/Mediacal_cost_Predication/Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [37]:
from google.colab import drive
drive.mount('/content/drive')

import os
import yaml
import logging
import warnings
warnings.filterwarnings('ignore')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [38]:
# Create project directory structure in Google Drive
BASE_DIR = '/content/drive/MyDrive/Medical Cost/'
DIRS = [
    '',
    'data/raw',
    'data/processed',
    'models',
    'config',
    'logs'
]

for dir_path in DIRS:
    full_path = os.path.join(BASE_DIR, dir_path)
    os.makedirs(full_path, exist_ok=True)

In [40]:
# Create config file
config = {
    'data': {
        'raw_data_path': os.path.join(BASE_DIR, 'data/raw/insurance.csv'),
        'processed_data_path': os.path.join(BASE_DIR, 'data/processed/insurance_processed.csv'),
        'test_size': 0.2,
        'random_state': 42
    },
    'model': {
        'type': 'polynomial',
        'degree': 2,
        'output_path': os.path.join(BASE_DIR, 'models/medical_cost_model.pkl')
    },
    'mlflow': {
        'tracking_uri': os.path.join(BASE_DIR, 'mlruns'),
        'experiment_name': 'medical_cost_prediction'
    }
}

In [None]:
# Save config
with open(os.path.join(BASE_DIR, 'config/config.yaml'), 'w') as f:
    yaml.dump(config, f)

print("Project structure created in Google Drive!")

Project structure created in Google Drive!


In [None]:
!pip install mlflow

Collecting mlflow
  Downloading mlflow-2.16.2-py3-none-any.whl.metadata (29 kB)
Collecting mlflow-skinny==2.16.2 (from mlflow)
  Downloading mlflow_skinny-2.16.2-py3-none-any.whl.metadata (30 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.13.3-py3-none-any.whl.metadata (7.4 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.3-py2.py3-none-any.whl.metadata (7.7 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.16.2->mlflow)
  Downloading databricks_sdk-0.34.0-py3-none-any.whl.metadata (37 kB)
Collecting gitpython<4,>=3.1.9 (from mlflow-skinny==2.16.2->mlflow)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting Mako (from alembic!=1.10.0,<2->mlflow)
  Downloading Mako-1.3.5-py3-none-any.whl.metadata (2.9 kB)
C

In [None]:
!pip install fastapi

Collecting fastapi
  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)
Collecting starlette<0.39.0,>=0.37.2 (from fastapi)
  Downloading starlette-0.38.6-py3-none-any.whl.metadata (6.0 kB)
Downloading fastapi-0.115.0-py3-none-any.whl (94 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.6/94.6 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading starlette-0.38.6-py3-none-any.whl (71 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.5/71.5 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: starlette, fastapi
Successfully installed fastapi-0.115.0 starlette-0.38.6


In [54]:
!pip install uvicorn



In [None]:
!pip install pyyaml



In [None]:
!pip install -U scikit-learn



In [75]:
#Data ingestion and preprocessing classes
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, PolynomialFeatures

class DataIngestion:
    def __init__(self, config):
        self.config = config

    def initiate_data_ingestion(self):
        logging.info("Initiating data ingestion")
        try:
            from google.colab import files
            print("Please upload the insurance.csv file when prompted")
            uploaded = files.upload()

            # Save uploaded file to Google Drive
            with open(self.config['data']['raw_data_path'], 'wb') as f:
                f.write(uploaded[list(uploaded.keys())[0]])

            df = pd.read_csv(self.config['data']['raw_data_path'])

            train_set, test_set = train_test_split(
                df,
                test_size=self.config['data']['test_size'],
                random_state=self.config['data']['random_state']
            )

            return train_set, test_set

        except Exception as e:
            logging.error(f"Exception occurred in data ingestion: {e}")
            raise e

class DataPreprocessor:
    def __init__(self, config):
        self.config = config
        self.le = LabelEncoder()

    def transform_features(self, df):
        df_processed = df.copy()

        # Encode categorical variables
        categorical_cols = ['sex', 'smoker', 'region']
        for col in categorical_cols:
            df_processed[col] = self.le.fit_transform(df_processed[col])

        # Drop the 'charges' column only if it exists (during training)
        if 'charges' in df_processed.columns:
            X = df_processed.drop(['charges'], axis=1)
            y = df_processed['charges']
        else:
            X = df_processed
            y = None

        # Apply polynomial features if specified
        if self.config['model']['type'] == 'polynomial':
            poly = PolynomialFeatures(degree=self.config['model']['degree'])
            X = poly.fit_transform(X)

        return X, y


In [76]:
#Model training with MLflow
import mlflow
import mlflow.sklearn
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
import joblib

class ModelTrainer:
    def __init__(self, config):
        self.config = config
        self.model = LinearRegression()

    def train(self, X_train, y_train, X_test, y_test):
        mlflow.set_tracking_uri(self.config['mlflow']['tracking_uri'])
        mlflow.set_experiment(self.config['mlflow']['experiment_name'])

        with mlflow.start_run():
            self.model.fit(X_train, y_train)

            # Make predictions
            y_train_pred = self.model.predict(X_train)
            y_test_pred = self.model.predict(X_test)

            # Calculate metrics
            train_r2 = r2_score(y_train, y_train_pred)
            test_r2 = r2_score(y_test, y_test_pred)
            test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))

            # Log metrics
            mlflow.log_metric("train_r2", train_r2)
            mlflow.log_metric("test_r2", test_r2)
            mlflow.log_metric("test_rmse", test_rmse)

            # Log model
            mlflow.sklearn.log_model(self.model, "model")

            # Save model to Google Drive
            joblib.dump(self.model, self.config['model']['output_path'])

            print(f"Training Results:")
            print(f"Train R2 Score: {train_r2:.4f}")
            print(f"Test R2 Score: {test_r2:.4f}")
            print(f"Test RMSE: {test_rmse:.4f}")

        return self.model

In [77]:
#Training Pipeline
class TrainingPipeline:
    def __init__(self, config_path):
        with open(config_path) as f:
            self.config = yaml.safe_load(f)

    def run_pipeline(self):
        try:
            # Data Ingestion
            data_ingestion = DataIngestion(self.config)
            train_data, test_data = data_ingestion.initiate_data_ingestion()
            print("Data ingestion completed")

            # Data Preprocessing
            preprocessor = DataPreprocessor(self.config)
            X_train, y_train = preprocessor.transform_features(train_data)
            X_test, y_test = preprocessor.transform_features(test_data)
            print("Data preprocessing completed")

            # Model Training
            trainer = ModelTrainer(self.config)
            model = trainer.train(X_train, y_train, X_test, y_test)
            print("Model training completed")

            return model

        except Exception as e:
            logging.error(f"Error in training pipeline: {e}")
            raise e

In [79]:
#Run the pipeline
config_path = os.path.join(BASE_DIR, 'config/config.yaml')
pipeline = TrainingPipeline(config_path)
model = pipeline.run_pipeline()

Please upload the insurance.csv file when prompted


Saving insurance.csv to insurance (2).csv
Data ingestion completed
Data preprocessing completed




Training Results:
Train R2 Score: 0.8374
Test R2 Score: 0.8649
Test RMSE: 4579.3788
Model training completed


In [80]:
# FastAPI implementation
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import nest_asyncio
import uvicorn
from fastapi.middleware.cors import CORSMiddleware

In [81]:
class MedicalCostInput(BaseModel):
    age: int
    sex: str
    bmi: float
    children: int
    smoker: str
    region: str

app = FastAPI()

# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

In [83]:
# Load the saved model
model = joblib.load(config['model']['output_path'])
preprocessor = DataPreprocessor(config)

@app.post("/predict")
async def predict_cost(input_data: MedicalCostInput):
    try:
        # Convert input data to DataFrame
        input_df = pd.DataFrame([input_data.dict()])

        # Preprocess the input
        X, _ = preprocessor.transform_features(input_df)

        # Make prediction
        prediction = model.predict(X)[0]

        return {"predicted_cost": float(prediction)}

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Enable nested asyncio
nest_asyncio.apply()

In [72]:
'''
# Start the FastAPI server
port = 7000  # Or any other unused port
print(f"Starting server on port {port}...")
uvicorn.run(app, port=port)
'''

Starting server on port 7000...


INFO:     Started server process [4560]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:7000 (Press CTRL+C to quit)
INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [4560]


In [61]:
!pip install pyngrok


Collecting pyngrok
  Downloading pyngrok-7.2.0-py3-none-any.whl.metadata (7.4 kB)
Downloading pyngrok-7.2.0-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.0


In [84]:
!ngrok authtoken 2nFgDi15pDxpx4xZ5yrYEF3NNGy_3Y3UURBaSQgF3C8Xd1pvq


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [85]:
from pyngrok import ngrok

# Start ngrok to tunnel to port 7000
public_url = ngrok.connect(8000)
print("Public URL:", public_url)


Public URL: NgrokTunnel: "https://f47b-34-106-196-67.ngrok-free.app" -> "http://localhost:8000"
