In [5]:
# question 11

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the heart disease dataset
df = pd.read_csv('heart_disease.csv')
df = df.drop_duplicates()

# Fill missing values
for column in df.columns:
    if df[column].dtype in ['int64', 'float64']:
        df[column].fillna(df[column].mean(), inplace=True)
    else:
        df[column].fillna(df[column].mode()[0], inplace=True)

# Convert 'age' to numeric
df['age'] = pd.to_numeric(df['age'], errors='coerce')

# Select features and target variable
features = ['age', 'trestbps', 'thalch', 'oldpeak', 'ca', 'num']
target = 'chol'

# Split the data into training and testing sets
X = df[features]
y = df[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate performance metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Print performance metrics
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R2): {r2}")

Mean Squared Error (MSE): 10796.089032439495
Root Mean Squared Error (RMSE): 103.90423009887276
R-squared (R2): 0.08647049464466738


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[column].fillna(df[column].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[column].fillna(df[column].mode()[0], inplace=True)
  df[column].fillna(df[column].mode()[0], inplace=True)


In [4]:
# question 12

import pandas as pd

df = pd.read_csv('heart_disease.csv')
df = df.drop_duplicates()

for column in df.columns:
    if df[column].dtype in ['int64', 'float64']:
        df[column].fillna(df[column].mean(), inplace=True)
    else:
        df[column].fillna(df[column].mode()[0], inplace=True)

df['age'] = pd.to_numeric(df['age'], errors='coerce')

df['age_group'] = pd.cut(df['age'], bins=[29, 40, 50, 60, 70, 80], labels=['30-40', '41-50', '51-60', '61-70', '71-80'])

df['bp_category'] = pd.cut(df['trestbps'], bins=[0, 120, 129, 139, 180, 300], labels=['Normal', 'Elevated', 'Hypertension Stage 1', 'Hypertension Stage 2', 'Hypertensive Crisis'])

df['st_depression'] = df['oldpeak'] > 1.0

df['age_trestbps'] = df['age'] * df['trestbps']
df['age_chol'] = df['age'] * df['chol']
df['age_thalach'] = df['age'] * df['thalch']

print(df.head())


   id  age     sex    dataset               cp  trestbps   chol    fbs  \
0   1   63    Male  Cleveland   typical angina     145.0  233.0   True   
1   2   67    Male  Cleveland     asymptomatic     160.0  286.0  False   
2   3   67    Male  Cleveland     asymptomatic     120.0  229.0  False   
3   4   37    Male  Cleveland      non-anginal     130.0  250.0  False   
4   5   41  Female  Cleveland  atypical angina     130.0  204.0  False   

          restecg  thalch  ...        slope   ca               thal  num  \
0  lv hypertrophy   150.0  ...  downsloping  0.0       fixed defect    0   
1  lv hypertrophy   108.0  ...         flat  3.0             normal    2   
2  lv hypertrophy   129.0  ...         flat  2.0  reversable defect    1   
3          normal   187.0  ...  downsloping  0.0             normal    0   
4  lv hypertrophy   172.0  ...    upsloping  0.0             normal    0   

  age_group           bp_category st_depression age_trestbps  age_chol  \
0     61-70  Hypertensio

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[column].fillna(df[column].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[column].fillna(df[column].mode()[0], inplace=True)
  df[column].fillna(df[column].mode()[0], inplace=True)
