In [2]:
import pandas as pd
df = pd.read_csv('./data/heart_rate.csv')  # or 'ISO-8859-1'


In [3]:
df.head

<bound method NDFrame.head of       Age  Resting_Heart_Rate  Systolic_BP  Cholesterol_Level Risk_Level
0    69.0                56.0        149.0              291.0        NaN
1    32.0                 NaN        137.0              278.0     Medium
2    89.0                 NaN        118.0              280.0        NaN
3    78.0               106.0         99.0              181.0        NaN
4    38.0               116.0        147.0                NaN        NaN
..    ...                 ...          ...                ...        ...
995  27.0                66.0        151.0              133.0     Medium
996  51.0                58.0        177.0              139.0       High
997  72.0                90.0        112.0              160.0     Medium
998  49.0                50.0        104.0              277.0        NaN
999  67.0                51.0        116.0              294.0        Low

[1000 rows x 5 columns]>

In [4]:
df.isnull().sum()

Age                   100
Resting_Heart_Rate    100
Systolic_BP           100
Cholesterol_Level     100
Risk_Level            100
dtype: int64

In [5]:
df.shape

(1000, 5)

In [6]:
df

Unnamed: 0,Age,Resting_Heart_Rate,Systolic_BP,Cholesterol_Level,Risk_Level
0,69.0,56.0,149.0,291.0,
1,32.0,,137.0,278.0,Medium
2,89.0,,118.0,280.0,
3,78.0,106.0,99.0,181.0,
4,38.0,116.0,147.0,,
...,...,...,...,...,...
995,27.0,66.0,151.0,133.0,Medium
996,51.0,58.0,177.0,139.0,High
997,72.0,90.0,112.0,160.0,Medium
998,49.0,50.0,104.0,277.0,


In [7]:
df['Age'] = df['Age'].fillna(df['Age'].mean())
df['Resting_Heart_Rate'] = df['Resting_Heart_Rate'].fillna(df['Resting_Heart_Rate'].mean())
df['Systolic_BP'] = df['Systolic_BP'].fillna(df['Systolic_BP'].mean())
df['Cholesterol_Level'] = df['Cholesterol_Level'].fillna(df['Cholesterol_Level'].mean())

In [8]:
df.isnull().sum()

Age                     0
Resting_Heart_Rate      0
Systolic_BP             0
Cholesterol_Level       0
Risk_Level            100
dtype: int64

In [9]:
df['Risk_Level'].value_counts()

Risk_Level
High      359
Medium    341
Low       200
Name: count, dtype: int64

In [10]:
df['Risk_Level'].fillna(df['Risk_Level'].mode()[0], inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Risk_Level'].fillna(df['Risk_Level'].mode()[0], inplace=True)


In [11]:
df.isnull().sum()

Age                   0
Resting_Heart_Rate    0
Systolic_BP           0
Cholesterol_Level     0
Risk_Level            0
dtype: int64

In [12]:
from imblearn.over_sampling import SMOTE
from collections import Counter
from sklearn.model_selection import train_test_split

# Split features and target
X = df.drop(columns=['Risk_Level'])  # Features
y = df['Risk_Level']  # Target

# Check original class distribution
print("Before SMOTE:", Counter(y))

# Apply SMOTE
smote = SMOTE(sampling_strategy='auto', random_state=42)  # 'auto' balances all classes
X_resampled, y_resampled = smote.fit_resample(X, y)

# Check new class distribution
print("After SMOTE:", Counter(y_resampled))


Before SMOTE: Counter({'High': 459, 'Medium': 341, 'Low': 200})
After SMOTE: Counter({'High': 459, 'Medium': 459, 'Low': 459})


In [13]:
risk_mapping = {'Low': 0, 'Medium': 1, 'High': 2}
df['Risk_Level'] = df['Risk_Level'].map(risk_mapping)

In [14]:
df['Risk_Level']

0      2
1      1
2      2
3      2
4      2
      ..
995    1
996    2
997    1
998    2
999    0
Name: Risk_Level, Length: 1000, dtype: int64

In [15]:
x_train, x_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)


In [16]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42)
rf.fit(x_train, y_train)


In [17]:
predcition = rf.predict(x_test)
print(predcition)

['Low' 'Low' 'High' 'Medium' 'Medium' 'Low' 'Low' 'Low' 'Low' 'High'
 'High' 'Low' 'Medium' 'Medium' 'Low' 'Low' 'Medium' 'Medium' 'Medium'
 'Medium' 'High' 'Medium' 'Low' 'Medium' 'High' 'Medium' 'Low' 'High'
 'Medium' 'Medium' 'Medium' 'Low' 'High' 'High' 'High' 'High' 'High'
 'Medium' 'High' 'Low' 'Medium' 'Medium' 'High' 'Low' 'Medium' 'Medium'
 'High' 'Low' 'High' 'High' 'Low' 'Low' 'Low' 'Low' 'Medium' 'High' 'High'
 'Medium' 'Low' 'Medium' 'Medium' 'Medium' 'High' 'Low' 'High' 'High'
 'Medium' 'Low' 'Low' 'High' 'Medium' 'Low' 'High' 'Medium' 'Low' 'Medium'
 'Low' 'Low' 'Low' 'Medium' 'Medium' 'Low' 'Medium' 'Low' 'Medium'
 'Medium' 'Medium' 'High' 'Medium' 'High' 'Low' 'Low' 'Medium' 'Low'
 'High' 'Low' 'Medium' 'Medium' 'Medium' 'Low' 'Medium' 'Medium' 'Low'
 'Medium' 'High' 'Low' 'Low' 'High' 'Medium' 'Medium' 'High' 'Medium'
 'Low' 'High' 'Medium' 'Low' 'Low' 'High' 'Medium' 'Medium' 'Low' 'High'
 'Medium' 'Medium' 'Medium' 'Medium' 'High' 'High' 'High' 'Medium'
 'Medium' 'H

In [18]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, predcition)
print('model accuracy:', accuracy)

model accuracy: 0.8659420289855072


In [19]:
rf.score(x_train, y_train)

1.0

In [20]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, predcition, labels=['Low', 'Medium', 'High'])
print(cm)


[[82  5  3]
 [ 6 91  2]
 [ 9 12 66]]


In [21]:
df.columns

Index(['Age', 'Resting_Heart_Rate', 'Systolic_BP', 'Cholesterol_Level',
       'Risk_Level'],
      dtype='object')

In [22]:
age = float(input('age'))
res = float(input('resting rate:'))
bp = float(input('bp'))
cho = float(input('cholestrol'))
import numpy as np
user = np.array([[age, res, bp, cho]])
predcition = rf.predict(user)
print('your heartrate disease risk:', predcition)

your heartrate disease risk: ['High']




In [28]:
import pickle


with open("model1.pkl", "wb") as file:
    pickle.dump(rf, file)

print("Model saved successfully as model.pkl")


Model saved successfully as model.pkl


In [29]:
with open("model1.pkl", "rb") as file:
    loaded_model = pickle.load(file)


sample_input = [[30, 70, 120, 200]]  # Example values
print("Predicted Risk Level:", loaded_model.predict(sample_input))




Predicted Risk Level: ['Low']


In [30]:
import os
print(os.listdir())  # Lists all files in the current directory



['.conda', '.git', '.gitignore', '.venv', '.vscode', 'data', 'DT.ipynb', 'HC.ipynb', 'heart_pred.ipynb', 'KNN.ipynb', 'KNN2.ipynb', 'linear2.ipynb', 'logistic.ipynb', 'main.py', 'ML.ipynb', 'ML2.ipynb', 'MLV.ipynb', 'model1.pkl', 'navbay.ipynb', 'navbay2.ipynb', 'RF.ipynb', 'RL.ipynb', 'STOCK PREDICTION MODEL', 'streamlit', 'svm.ipynb', 'Temp_data.ipynb', 'validation2.ipynb', 'validation4(regression).ipynb', 'vcodeztask2.ipynb', 'vcodeztask3.ipynb', 'venv']


In [31]:
import pickle

# Check if model.pkl exists
if os.path.exists("model1.pkl"):
    with open("model1.pkl", "rb") as file:
        model = pickle.load(file)
    print("Model loaded successfully!")
else:
    print("Error: model1.pkl not found!")


Model loaded successfully!
