In [None]:
# 1. Imports

In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
import pickle
from tkinter import *
from tkinter import ttk, mainloop, StringVar


In [None]:

# 2. Reading Data from CSV
def read_csv(file_path):
    return pd.read_csv(file_path)

In [None]:
    
# 3. Data Overview and Checks
def dataset_info_statistics(data):
    print("Dataset Information:")
    print(data.info())
    print("\n")
    print("Basic Statistics for Numerical Columns:")
    print(data.describe())
    print("\n")

def check_null(data):
    null_counts = data.isnull().sum()
    print("Null Values in the Dataset:")
    return null_counts

In [None]:

def check_duplicates(data):
    return data.duplicated().any()

In [None]:

# 4. Data Visualization
def plot_graph(data):
    numerical_columns = data.select_dtypes(include=np.number).columns
    for column in numerical_columns:
    plt.figure(figsize=(5, 3))
    sns.distplot(data[column], kde=True)
    plt.title(f"Histogram for {column}")
    plt.xlabel(column)
    plt.ylabel("Frequency")

In [None]:
plt.show()

In [None]:

categorical_columns = data.select_dtypes(include='object').columns

for column in categorical_columns:
    plt.figure(figsize=(5, 3))
    sns.countplot(data[column])
    plt.title(f'Countplot for {column}')
    plt.xlabel(column)
    plt.ylabel('Count')
    plt.xticks(rotation=45)

In [None]:
plt.show()

In [None]:

# 5. Feature and Target Separation
def separate_features_target(data, target_column):
    X = data.drop(columns=[target_column], axis=1)
    y = data[target_column]
    return X, y

In [None]:

# 6. Train-Test Split
def perform_train_test_split(X, y, test_size=0.20, random_state=42):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size,random_state=random_state)
    return X_train, X_test, y_train, y_test

In [None]:

# 7. Data Preprocessing and Pipeline Creation
preprocessor = ColumnTransformer(transformers=[
    ('ordinal', OrdinalEncoder(), ['Gender']),
    ('num', StandardScaler(), ['Age', 'Height', 'Weight', 'Duration', 'Heart_Rate', 'Body_Temp']),
], remainder='passthrough')

pipeline = Pipeline([("preprocessor", preprocessor), ("model", LinearRegression())])

In [None]:

# 8. Model Training and Evaluation
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
print(r2_score(y_test, y_pred))
print(mean_absolute_error(y_test, y_pred))

In [None]:

# 9. Model Scoring Function
def model_scorer(model_name, model):
    output = []
    output.append(model_name)
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('model', model)
    ])
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    output.append(r2_score(y_test, y_pred))
    output.append(mean_absolute_error(y_test, y_pred))
    kfold = KFold(n_splits=5, shuffle=True, random_state=42)
    cv_results = cross_val_score(pipeline, X, y, cv=kfold, scoring='r2')
    
    output.append(cv_results.mean())

    return output

In [None]:

# 10. Model Comparison
model_dict = {
    'SVR': SVR(),
    'LR': LinearRegression(),
    'RF': RandomForestRegressor(),
    'XGBR': XGBRegressor()
}
model_output = []
for model_name, model in model_dict.items():
    model_output.append(model_scorer(model_name, model))
    
print(model_output)

In [None]:

# 11. Model Training with Best Model
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', XGBRegressor())
])
pipeline.fit(X, y)

In [None]:

# 12. Sample Prediction
sample = pd.DataFrame({
    'Gender': ['male'],
    'Age': [68],
    'Height': [190.0],
    'Weight': [94.0],
    'Duration': [29.0],
    'Heart_Rate': [105.0],
    'Body_Temp': [40.8],
})
print(pipeline.predict(sample))

In [None]:

# 13. Model Saving and Loading
with open('pipeline.pkl', 'wb') as f:
    pickle.dump(pipeline, f)

with open('pipeline.pkl', 'rb') as f:
    pipeline_saved = pickle.load(f)

result = pipeline_saved.predict(sample)
print(result)
