In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

def train_model(data_path):
    # Load the dataset
    data = pd.read_csv(data_path)

    # Clean and prepare the data
    data.dropna(subset=['Life_Expectancy'], inplace=True)
    X = data[['Country', 'Age', 'Cigarettes_Per_Day']]
    y = data['Life_Expectancy']

    # Preprocessing for categorical data
    categorical_features = ['Country']
    categorical_transformer = OneHotEncoder(handle_unknown='ignore')
    preprocessor = ColumnTransformer(
        transformers=[
            ('cat', categorical_transformer, categorical_features)
        ],
        remainder='passthrough'
    )

    # Create a pipeline with preprocessing and RandomForestRegressor
    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('regressor', RandomForestRegressor(random_state=42))
    ])
    param_grid = {
        'regressor__n_estimators': [50, 100, 200],
        'regressor__max_depth': [None, 10, 20, 30],
        'regressor__min_samples_split': [2, 5, 10]
    }
    grid_search = GridSearchCV(pipeline, param_grid, cv=3, scoring='neg_mean_squared_error', verbose=1)

    # Train the model
    grid_search.fit(X, y)  # Fitting the entire pipeline

    return grid_search.best_estimator_

def predict_life_expectancy(model, country, age, cigarettes_per_day):
    # Create a DataFrame from the inputs
    input_data = pd.DataFrame({
        'Country': [country],
        'Age': [age],
        'Cigarettes_Per_Day': [cigarettes_per_day]
    })

    # Use the trained model to make a prediction
    predicted_life_expectancy = model.predict(input_data)

    return predicted_life_expectancy[0]

# Only one place to specify the data file path
data_path = 'age_detection_model.csv'
model = train_model(data_path)

# Example usage
predicted_age = predict_life_expectancy(model, 'USA', 30, 5)
print(f"Predicted Life Expectancy: {predicted_age}")


Fitting 3 folds for each of 36 candidates, totalling 108 fits
Predicted Life Expectancy: 71.42458672516807


In [4]:
pip install joblib


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import joblib

# Assuming 'model' is your trained model variable
joblib.dump(model, 'age.pkl')  # Save the model as a .pkl file



['age.pkl']

: 