In [8]:
!pip3 install warnings

[0m

# Import Required Libraries
Import the necessary libraries, including pandas, numpy, sklearn, and matplotlib.

In [10]:
# Importing the necessary libraries
import sys
# pandas for data manipulation and analysis
import pandas as pd

# numpy for numerical computing
import numpy as np

# sklearn for machine learning
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

# matplotlib for data visualization
import matplotlib.pyplot as plt

# Seaborn for statistical data visualization
import seaborn as sns

# To ignore any warnings
import warnings
warnings.filterwarnings('ignore')

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


# Load and Preprocess the Data
Load the dataset and preprocess it by handling missing values and outliers.

In [None]:
# Load the dataset
df = pd.read_csv('insulin_data.csv')

# Display the first few rows of the dataframe
print(df.head())

# Check the info of the dataframe
print(df.info())

# Check for missing values
print(df.isnull().sum())

# Fill missing values with the mean of the respective column
df.fillna(df.mean(), inplace=True)

# Check for outliers in the 'carb_intake', 'current_blood_sugar', 'current_weight' columns
Q1 = df[['carb_intake', 'current_blood_sugar', 'current_weight']].quantile(0.25)
Q3 = df[['carb_intake', 'current_blood_sugar', 'current_weight']].quantile(0.75)
IQR = Q3 - Q1

# Remove outliers
df = df[~((df[['carb_intake', 'current_blood_sugar', 'current_weight']] < (Q1 - 1.5 * IQR)) | (df[['carb_intake', 'current_blood_sugar', 'current_weight']] > (Q3 + 1.5 * IQR))).any(axis=1)]

# Display the first few rows of the cleaned dataframe
print(df.head())

: 

# Exploratory Data Analysis
Perform exploratory data analysis to understand the data better and find any patterns or correlations.

In [None]:
# Exploratory Data Analysis

# Summary statistics
print(df.describe())

# Correlation matrix
corr_matrix = df.corr()
print(corr_matrix)

# Heatmap of the correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

# Pairplot to visualize the relationship between features
sns.pairplot(df)
plt.show()

# Histograms for each variable in df
df.hist(bins=30, figsize=(15, 10))
plt.show()

# Boxplots for 'carb_intake', 'current_blood_sugar', 'current_weight'
plt.figure(figsize=(15, 10))
plt.subplot(2, 2, 1)
sns.boxplot(x=df['carb_intake'])
plt.title('Carb Intake Boxplot')

plt.subplot(2, 2, 2)
sns.boxplot(x=df['current_blood_sugar'])
plt.title('Current Blood Sugar Boxplot')

plt.subplot(2, 2, 3)
sns.boxplot(x=df['current_weight'])
plt.title('Current Weight Boxplot')

plt.tight_layout()
plt.show()

: 

# Data Splitting
Split the dataset into training and testing sets.

In [None]:
# Define the features and the target variable
X = df[['carb_intake', 'current_blood_sugar', 'current_weight']]
y = df['insulin_needs']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check the shapes of the training and testing sets
print("Shape of X_train: ", X_train.shape)
print("Shape of y_train: ", y_train.shape)
print("Shape of X_test: ", X_test.shape)
print("Shape of y_test: ", y_test.shape)

: 

# Model Building
Build a machine learning model using an appropriate algorithm.

In [None]:
# Model Building

# Initialize the Linear Regression model
model = LinearRegression()

# Fit the model with the training data
model.fit(X_train, y_train)

# Predict the target variable for the training data
y_train_pred = model.predict(X_train)

# Predict the target variable for the testing data
y_test_pred = model.predict(X_test)

# Calculate and print the R-squared value for the training data
r2_train = metrics.r2_score(y_train, y_train_pred)
print("R-squared value for the training data: ", r2_train)

# Calculate and print the R-squared value for the testing data
r2_test = metrics.r2_score(y_test, y_test_pred)
print("R-squared value for the testing data: ", r2_test)

# Calculate and print the Mean Squared Error for the training data
mse_train = metrics.mean_squared_error(y_train, y_train_pred)
print("Mean Squared Error for the training data: ", mse_train)

# Calculate and print the Mean Squared Error for the testing data
mse_test = metrics.mean_squared_error(y_test, y_test_pred)
print("Mean Squared Error for the testing data: ", mse_test)

# Calculate and print the Root Mean Squared Error for the training data
rmse_train = np.sqrt(mse_train)
print("Root Mean Squared Error for the training data: ", rmse_train)

# Calculate and print the Root Mean Squared Error for the testing data
rmse_test = np.sqrt(mse_test)
print("Root Mean Squared Error for the testing data: ", rmse_test)

: 

# Model Training
Train the model using the training set.

In [None]:
# Model Training

# Initialize the Linear Regression model
model = LinearRegression()

# Fit the model with the training data
model.fit(X_train, y_train)

# Predict the target variable for the training data
y_train_pred = model.predict(X_train)

# Predict the target variable for the testing data
y_test_pred = model.predict(X_test)

# Calculate and print the R-squared value for the training data
r2_train = metrics.r2_score(y_train, y_train_pred)
print("R-squared value for the training data: ", r2_train)

# Calculate and print the R-squared value for the testing data
r2_test = metrics.r2_score(y_test, y_test_pred)
print("R-squared value for the testing data: ", r2_test)

# Calculate and print the Mean Squared Error for the training data
mse_train = metrics.mean_squared_error(y_train, y_train_pred)
print("Mean Squared Error for the training data: ", mse_train)

# Calculate and print the Mean Squared Error for the testing data
mse_test = metrics.mean_squared_error(y_test, y_test_pred)
print("Mean Squared Error for the testing data: ", mse_test)

# Calculate and print the Root Mean Squared Error for the training data
rmse_train = np.sqrt(mse_train)
print("Root Mean Squared Error for the training data: ", rmse_train)

# Calculate and print the Root Mean Squared Error for the testing data
rmse_test = np.sqrt(mse_test)
print("Root Mean Squared Error for the testing data: ", rmse_test)

: 

# Model Evaluation
Evaluate the model's performance using the testing set.

In [None]:
# Model Evaluation

# Calculate and print the Mean Absolute Error for the training data
mae_train = metrics.mean_absolute_error(y_train, y_train_pred)
print("Mean Absolute Error for the training data: ", mae_train)

# Calculate and print the Mean Absolute Error for the testing data
mae_test = metrics.mean_absolute_error(y_test, y_test_pred)
print("Mean Absolute Error for the testing data: ", mae_test)

# Calculate and print the Mean Squared Log Error for the training data
msle_train = metrics.mean_squared_log_error(y_train, y_train_pred)
print("Mean Squared Log Error for the training data: ", msle_train)

# Calculate and print the Mean Squared Log Error for the testing data
msle_test = metrics.mean_squared_log_error(y_test, y_test_pred)
print("Mean Squared Log Error for the testing data: ", msle_test)

# Calculate and print the Median Absolute Error for the training data
medae_train = metrics.median_absolute_error(y_train, y_train_pred)
print("Median Absolute Error for the training data: ", medae_train)

# Calculate and print the Median Absolute Error for the testing data
medae_test = metrics.median_absolute_error(y_test, y_test_pred)
print("Median Absolute Error for the testing data: ", medae_test)

# Plotting the actual vs predicted values for the training set
plt.scatter(y_train, y_train_pred)
plt.xlabel("Actual Insulin Needs")
plt.ylabel("Predicted Insulin Needs")
plt.title("Actual vs Predicted Insulin Needs for Training Set")
plt.show()

# Plotting the actual vs predicted values for the testing set
plt.scatter(y_test, y_test_pred)
plt.xlabel("Actual Insulin Needs")
plt.ylabel("Predicted Insulin Needs")
plt.title("Actual vs Predicted Insulin Needs for Testing Set")
plt.show()

: 

# Predict Insulin Needs
Use the trained model to predict insulin needs based on carb intake, current blood sugar, and current weight.

In [None]:
# Predict Insulin Needs

# Define the function to predict insulin needs
def predict_insulin_needs(carb_intake, current_blood_sugar, current_weight):
    # Convert the inputs to numpy array
    inputs = np.array([carb_intake, current_blood_sugar, current_weight]).reshape(1, -1)
    
    # Use the trained model to predict insulin needs
    predicted_insulin_needs = model.predict(inputs)
    
    # Return the predicted insulin needs
    return predicted_insulin_needs[0]

# Test the function with some inputs
carb_intake = 200
current_blood_sugar = 150
current_weight = 70

predicted_insulin_needs = predict_insulin_needs(carb_intake, current_blood_sugar, current_weight)

print("Predicted Insulin Needs: ", predicted_insulin_needs)

: 

# Update Weight
Update the weight based on the previous prediction if no weight is entered.

In [None]:
# Update Weight

# Define the function to update weight
def update_weight(previous_weight, weight=None):
    # If no weight is entered, use the previous prediction
    if weight is None:
        weight = previous_weight
    
    # Return the updated weight
    return weight

# Test the function with some inputs
previous_weight = 70
new_weight = update_weight(previous_weight)

print("Updated Weight: ", new_weight)

: 