In [2]:
# Install necessary libraries (if not already installed)
%pip install pandas numpy scikit-learn matplotlib seaborn joblib

# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import joblib



Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)
Installing collected packages: seaborn
Successfully installed seaborn-0.13.2



[notice] A new release of pip is available: 25.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
# Load the dataset
file_path = "C:/flutter-projects/flutter-pregnancy-app/Machine_Learning/task 2/Birthweight_reduced_kg_R.csv"  # Update the path if needed
df = pd.read_csv(file_path)

# Display the first few rows
print(df.head())

     ID  Length  Birthweight  Headcirc  Gestation  smoker  mage  mnocig  \
0  1360      56         4.55        34         44       0    20       0   
1  1016      53         4.32        36         40       0    19       0   
2   462      58         4.10        39         41       0    35       0   
3  1187      53         4.07        38         44       0    20       0   
4   553      54         3.94        37         42       0    24       0   

   mheight  mppwt  fage  fedyrs  fnocig  fheight  lowbwt  mage35  
0      162     57    23      10      35      179       0       0  
1      171     62    19      12       0      183       0       0  
2      172     58    31      16      25      185       0       1  
3      174     68    26      14      25      189       0       0  
4      175     66    30      12       0      184       0       0  


In [4]:
# Define features (X) and target (y)
X = df.drop(columns=["Birthweight"])  # Assuming "Birthweight" is the target variable
y = df["Birthweight"]

# Display dataset dimensions
print("Feature Matrix Shape:", X.shape)
print("Target Variable Shape:", y.shape)


Feature Matrix Shape: (42, 15)
Target Variable Shape: (42,)


In [5]:
# Split into 80% training and 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Confirm the split
print("Training set size:", X_train.shape[0])
print("Testing set size:", X_test.shape[0])


Training set size: 33
Testing set size: 9


In [6]:
# Initialize a scaler
scaler = StandardScaler()

# Fit and transform training data
X_train_scaled = scaler.fit_transform(X_train)

# Transform test data
X_test_scaled = scaler.transform(X_test)

# Save the scaler for later use
joblib.dump(scaler, "scaler.pkl")


['scaler.pkl']

In [8]:
# Initialize models
lr = LinearRegression()
dt = DecisionTreeRegressor(random_state=42)
rf = RandomForestRegressor(n_estimators=100, random_state=42)

# Train models
lr.fit(X_train_scaled, y_train)
dt.fit(X_train_scaled, y_train)
rf.fit(X_train_scaled, y_train)

# Predict on test data
y_pred_lr = lr.predict(X_test_scaled)
y_pred_dt = dt.predict(X_test_scaled)
y_pred_rf = rf.predict(X_test_scaled)

# Evaluate models using RMSE
rmse_lr = mean_squared_error(y_test, y_pred_lr)
rmse_dt = mean_squared_error(y_test, y_pred_dt)
rmse_rf = mean_squared_error(y_test, y_pred_rf)

print(f"Linear Regression RMSE: {rmse_lr:.2f}")
print(f"Decision Tree RMSE: {rmse_dt:.2f}")
print(f"Random Forest RMSE: {rmse_rf:.2f}")


Linear Regression RMSE: 0.16
Decision Tree RMSE: 0.16
Random Forest RMSE: 0.19


In [9]:
# Select the best model based on RMSE
best_model = None
best_model_name = ""
best_rmse = min(rmse_lr, rmse_dt, rmse_rf)

if best_rmse == rmse_lr:
    best_model = lr
    best_model_name = "Linear Regression"
elif best_rmse == rmse_dt:
    best_model = dt
    best_model_name = "Decision Tree"
else:
    best_model = rf
    best_model_name = "Random Forest"

# Save the best model
joblib.dump(best_model, "best_model.pkl")

print(f"Best Model: {best_model_name} saved as best_model.pkl")


Best Model: Linear Regression saved as best_model.pkl


In [10]:
# Load the best model
loaded_model = joblib.load("best_model.pkl")

# Example input (modify based on your dataset)
sample_input = np.array([X_test.iloc[0]])  # Using first test example
sample_input_scaled = scaler.transform(sample_input)

# Predict
predicted_bw = loaded_model.predict(sample_input_scaled)[0]

print(f"Predicted Birthweight: {predicted_bw:.2f} kg")


Predicted Birthweight: 1.90 kg


