In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
data = pd.read_csv("heart.csv")
print("Dataset preview:\n", data.head())
print("\nColumn names:", list(data.columns))
X = data[["age", "chol"]]   
y = data["target"]          
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nModel Coefficients (weights for age & chol):", model.coef_)
print("Intercept:", model.intercept_)
print("Mean Squared Error (MSE):", mse)
print("R² Score:", r2)

new_data = pd.DataFrame({
    "age": [50],    
    "chol": [250]    
})
prediction = model.predict(new_data)
print("\nPredicted Heart Disease Value:", prediction[0])


Dataset preview:
    age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  

Column names: ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']

Model Coefficients (weights for age & chol): [-1.26165264e-02 -3.48800680e-05]
Intercept: 1.2453281547068722
Mean Squared Error (MSE): 0.2408803731813779
R² Score: 0.034142382965617335

Predicted Heart Disease Value: 0.60578

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Load dataset
data = pd.read_csv("insurance.csv")
print("Dataset preview:\n", data.head())

# Step 2: Encode categorical variables (sex, smoker, region) using one-hot encoding
data_encoded = pd.get_dummies(data, columns=['sex', 'smoker', 'region'], drop_first=True)

# Step 3: Define features (independent variables) and target (dependent variable)
X = data_encoded.drop("expenses", axis=1)   # All columns except target
y = data_encoded["expenses"]                # Target variable

# Step 4: Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Step 5: Train Multiple Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 6: Make predictions
y_pred = model.predict(X_test)

# Step 7: Evaluate performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nModel Coefficients:", model.coef_)
print("Intercept:", model.intercept_)
print("Mean Squared Error (MSE):", mse)
print("R² Score:", r2)

# Step 8: Predict medical expenses for a new person
new_data = pd.DataFrame({
    "age": [40],
    "bmi": [28.5],
    "children": [2],
    "sex_male": [1],         # male=1, female=0
    "smoker_yes": [0],       # smoker=1, non-smoker=0
    "region_northwest": [0],
    "region_southeast": [1],
    "region_southwest": [0]
})

prediction = model.predict(new_data)
print("\nPredicted Medical Expenses for new person:", prediction[0])


Dataset preview:
    age     sex   bmi  children smoker     region  expenses
0   19  female  27.9         0    yes  southwest  16884.92
1   18    male  33.8         1     no  southeast   1725.55
2   28    male  33.0         3     no  southeast   4449.46
3   33    male  22.7         0     no  northwest  21984.47
4   32    male  28.9         0     no  northwest   3866.86

Model Coefficients: [ 2.56955959e+02  3.37271473e+02  4.25641376e+02 -1.85197407e+01
  2.36503123e+04 -3.70313511e+02 -6.58712382e+02 -8.09229878e+02]
Intercept: -11936.774427292205
Mean Squared Error (MSE): 33600065.35507782
R² Score: 0.7835726930039906

Predicted Medical Expenses for new person: 8127.751551115585
