# Heart Disease Data Analysis
This notebook performs data cleaning, transformation, error correction, and model building on the heart disease dataset.

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
# Load your data
heart = pd.read_csv("heart.csv")
air = pd.read_csv("airquality.csv")

## Step 1: Data Cleaning

In [None]:
# Fill missing values with mean (only numeric columns)
heart = heart.fillna(heart.mean(numeric_only=True))
air = air.fillna(air.mean(numeric_only=True))

# Remove duplicate rows
heart = heart.drop_duplicates()
air = air.drop_duplicates()

## Step 2: Error Correcting

In [None]:
# Remove wrong/invalid data (like zero or negative blood pressure)
heart = heart[heart["trestbps"] > 0]

## Step 3: Data Transformation

In [None]:
# Copy data to keep original safe
heart_scaled = heart.copy()

# Select numeric columns to scale
numeric_cols = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']

# Apply MinMaxScaler
scaler = MinMaxScaler()
heart_scaled[numeric_cols] = scaler.fit_transform(heart_scaled[numeric_cols])

## Step 4: Data Model Building

In [None]:
# Separate features and label
X = heart_scaled.drop("target", axis=1)
y = heart_scaled["target"]

# Split into training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predict and check accuracy
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)

print("Model Accuracy:", accuracy)