In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, f_classif
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib

df = pd.read_csv('dataset.csv')

numeric_features = ['Температура на момент обнаружения (°C)', 'Влажность (%)', 'Скорость ветра (км/ч)', 'Возраст']
categorical_features = ['Район', 'Причина смерти', 'Пол', 'Ранения', 'Место ранения']

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=0.95)),
    ('feature_selection', SelectKBest(f_classif, k='all'))
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

xg_reg = XGBRegressor(objective='reg:squarederror', colsample_bytree=0.5, learning_rate=0.05,
                      max_depth=10, alpha=1, n_estimators=100)

pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('regressor', xg_reg)])

X_train, X_test, y_train, y_test = train_test_split(df.drop('Время с момента смерти (часы)', axis=1), df['Время с момента смерти (часы)'], test_size=0.2, random_state=42)

pipeline.fit(X_train, y_train)

new_data = {
    'Дата и время обнаружения': '2024-03-6 10:20:00',
    'Район': 'Алмалинский',
    'Температура на момент обнаружения (°C)': 31.5,
    'Влажность (%)': 30,
    'Скорость ветра (км/ч)': 10,
    'Возраст': 50,
    'Причина смерти': 'естественная',
    'Пол': 'мужской',
    'Ранения': 'нет',
    'Место ранения': 'голова'
}

new_data_df = pd.DataFrame([new_data])

predicted_death_time = pipeline.predict(new_data_df)

print("Predicted time of death:", predicted_death_time[0])

joblib.dump(pipeline, 'death_time_predictor_model.pkl')


Predicted time of death: 32.181282


['death_time_predictor_model.pkl']

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, f_classif
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib

df = pd.read_csv('dataset.csv')

X = df.drop('Время с момента смерти (часы)', axis=1)
y = df['Время с момента смерти (часы)']

numeric_features = ['Температура на момент обнаружения (°C)', 'Влажность (%)', 'Скорость ветра (км/ч)', 'Возраст']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=0.95)),
    ('feature_selection', SelectKBest(f_classif, k='all'))
])

categorical_features = ['Район', 'Причина смерти', 'Пол', 'Ранения', 'Место ранения']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

xg_reg = XGBRegressor(objective='reg:squarederror', colsample_bytree=0.5, learning_rate=0.05,
                      max_depth=10, alpha=1, n_estimators=100)

pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('model', xg_reg)])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error (MSE): {mse}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'Mean Absolute Error (MAE): {mae}')

joblib.dump(pipeline, 'death_time_predictor_model.pkl')


Mean Squared Error (MSE): 430.1529158945713
Root Mean Squared Error (RMSE): 20.740128155210886
Mean Absolute Error (MAE): 17.904498950004577


['death_time_predictor_model.pkl']