## 1. Importing the Libraries

In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder , LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import joblib
from sklearn.model_selection import learning_curve


from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


ModuleNotFoundError: No module named 'seaborn'

## 2. Data Collection

In [None]:
df = pd.read_csv('data/patient_data1.csv')
df

In [None]:
df.rename(columns={"C":"Gender"},inplace=True)

In [None]:
df.info()

In [None]:
df.isnull().sum()

## 3. EDA

In [None]:
df.describe()

## 3.1 Univariate Analysis

In [None]:
gender_counts = df['Gender'].value_counts()


plt.pie(gender_counts, labels=gender_counts.index, autopct='%1.1f%%')
plt.title("Gender Distribution")
plt.figure(figsize=(8,4))
plt.show()

In [None]:
sns.countplot(x='Age', data=df, palette='Reds')
plt.title('Gender Distribution')
plt.xlabel('Age')
plt.ylabel('Count')
plt.grid(axis='y')
plt.show()


In [2]:
plt.figure(figsize=(12, 6))
sns.countplot(x='Stages', data=df, palette='Reds' ,order=['NORMAL','HYPERTENSION (Stage-1)','HYPERTENSION (Stage-2)','HYPERTENSIVE CRISIS'])

plt.title('Hypertension Stage Distribution')
plt.xlabel('Stage')
plt.ylabel('Frequency')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.xticks(rotation=45, ha='right')
plt.show()

NameError: name 'sns' is not defined

<Figure size 1200x600 with 0 Axes>

## 3.2 Bivariate anaylsis

In [3]:
plt.figure(figsize=(8,5))
sns.countplot(x='Gender', hue='Systolic', data=df, palette='Reds')
plt.title('Plot systolic blood pressure')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

NameError: name 'sns' is not defined

<Figure size 800x500 with 0 Axes>

In [4]:
sns.countplot(x='TakeMedication',hue='Severity',data=df,palette='Reds')
plt.title('Count of Take medication by severity')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

NameError: name 'sns' is not defined

## 4. Split the dataset


In [5]:
df

NameError: name 'df' is not defined

In [6]:
stage_corrections = {
    'HYPERTENSION (Stage-2).': 'HYPERTENSION (Stage-2)',
    'HYPERTENSIVE CRISI': 'HYPERTENSIVE CRISIS'
}
df['Stages'] = df['Stages'].replace(stage_corrections)

NameError: name 'df' is not defined

In [7]:
stages_order = [
    'NORMAL',
    'HYPERTENSION (Stage-1)',
    'HYPERTENSION (Stage-2)',
    'HYPERTENSIVE CRISIS'
]

In [8]:
od = OrdinalEncoder(categories=[stages_order])
df['Stages'] = od.fit_transform(df[['Stages']])

NameError: name 'OrdinalEncoder' is not defined

In [9]:
X=df.drop(columns=['Stages'])
y=df['Stages']

NameError: name 'df' is not defined

In [10]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

NameError: name 'train_test_split' is not defined

## 4. Data Preprocesing

In [11]:
num_features = ['Systolic', 'Diastolic']
cat_features = [
    'Gender', 'Age', 'History', 'Patient', 'TakeMedication',
    'Severity', 'BreathShortness', 'VisualChanges', 'NoseBleeding',
    'Whendiagnoused', 'ControlledDiet'
]


In [12]:
num_transformer =  Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("ordinal" , OrdinalEncoder())
])

cat_transformer = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(sparse_output =False ,handle_unknown='ignore'))
])



NameError: name 'Pipeline' is not defined

In [13]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', num_transformer, num_features),
        ('cat', cat_transformer, cat_features)
    ]
)

NameError: name 'ColumnTransformer' is not defined

In [14]:
import sklearn
sklearn.set_config(transform_output="pandas")

In [15]:
preprocessor.fit_transform(X_train)

NameError: name 'preprocessor' is not defined

## 5. Model Selection

In [None]:
algorithms = {
    "Logistic Regression": LogisticRegression(multi_class='multinomial', max_iter=500),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "Support Vector Machine": SVC(probability=True),
}

In [None]:
X_test.info()

In [None]:
def plot_curves(sizes, mean_scores, std_scores, label, ax):
	ax.plot(
		sizes,
		mean_scores,
		marker="o",
		label=label
	)

	ax.fill_between(
		x=sizes,
		y1=mean_scores - std_scores,
		y2=mean_scores + std_scores,
		alpha=0.5
	)

In [None]:
def plot_learning_curves(name, algorithm, figsize=(12, 4)):
	model = Pipeline(steps=[
		("pre", preprocessor),
		("alg", algorithm)
	])

	train_sizes, train_scores, test_scores = learning_curve(
		estimator=model,
		X=X_train,
		y=y_train,
		cv=3,
		scoring="accuracy",  # üîÅ classification metric
		n_jobs=-1,
		random_state=42
	)

	mean_train_scores = np.mean(train_scores, axis=1)
	std_train_scores = np.std(train_scores, axis=1)
	train_score = f"{mean_train_scores[-1]:.2f} +/- {std_train_scores[-1]:.2f}"

	mean_test_scores = np.mean(test_scores, axis=1)
	std_test_scores = np.std(test_scores, axis=1)
	test_score = f"{mean_test_scores[-1]:.2f} +/- {std_test_scores[-1]:.2f}"

	fig, ax = plt.subplots(figsize=figsize)

	# training curve
	plot_curves(
		train_sizes,
		mean_train_scores,
		std_train_scores,
		f"Train ({train_score})",
		ax
	)

	# test curve
	plot_curves(
		train_sizes,
		mean_test_scores,
		std_test_scores,
		f"Test ({test_score})",
		ax
	)

	ax.set(xlabel="Training Set Size", ylabel="Accuracy", title=name)
	ax.legend(loc="lower right")

	plt.show()

In [None]:
for name, alg in algorithms.items():
	plot_learning_curves(name, alg)

## 6. Model Training

In [None]:
model = Pipeline(steps=[
	("pre", preprocessor),
	("rf", LogisticRegression(multi_class='multinomial', max_iter=500))
])

In [None]:
model.fit(X_train, y_train)

## 7. Model Evaluation

In [None]:
for name, alg in algorithms.items():
    model = Pipeline(steps=[
        ("pre", preprocessor),
        ("alg", alg)
    ])
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Metrics
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='macro')  # or 'weighted'
    rec = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')

    print(f"\n{name}:")
    print(f"  Accuracy : {acc:.3f}")
    print(f"  Precision: {prec:.3f}")
    print(f"  Recall   : {rec:.3f}")
    print(f"  F1-Score : {f1:.3f}")

In [None]:
joblib.dump(model, "model.joblib")

In [None]:
saved_model = joblib.load("model.joblib")
saved_model