## **Applied Learning Assignments 2:**

Interactive 3D Scatter Plot of Heart Attack Risk Factors

In [1]:
!pip install plotly
from IPython.display import display, HTML

display(HTML("<style>.container { width:100% !important; }</style>"))

import pandas as pd

import plotly.graph_objs as go



Load Data

In [2]:
# Load dataset (replace with actual path)
df = pd.read_csv("/content/heart_attack_south_africa (4).csv")

Data Check

In [3]:
df.head()

Unnamed: 0,Patient_ID,Age,Gender,Cholesterol_Level,Blood_Pressure_Systolic,Blood_Pressure_Diastolic,Smoking_Status,Alcohol_Intake,Physical_Activity,Obesity_Index,Diabetes_Status,Family_History_Heart_Disease,Diet_Quality,Stress_Level,Heart_Attack_History,Medication_Usage,Triglycerides_Level,LDL_Level,HDL_Level,Heart_Attack_Outcome
0,1,76,Female,156,94,79,No,High,Sedentary,35.2,No,No,Good,High,No,No,264.0,141.0,65.0,0.0
1,2,39,Female,160,185,88,No,Low,Sedentary,21.3,No,No,Average,Medium,No,Yes,150.0,81.0,42.0,0.0
2,3,85,Male,254,173,113,Yes,Moderate,Highly Active,32.0,Yes,No,Good,Low,No,No,220.0,124.0,70.0,1.0
3,4,45,Female,261,187,65,No,High,Sedentary,36.4,No,Yes,Average,Low,No,Yes,141.0,56.0,38.0,1.0
4,5,48,Male,206,189,84,No,Low,Active,26.6,No,No,Good,High,No,No,221.0,186.0,26.0,0.0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11874 entries, 0 to 11873
Data columns (total 20 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Patient_ID                    11874 non-null  int64  
 1   Age                           11874 non-null  int64  
 2   Gender                        11874 non-null  object 
 3   Cholesterol_Level             11874 non-null  int64  
 4   Blood_Pressure_Systolic       11874 non-null  int64  
 5   Blood_Pressure_Diastolic      11874 non-null  int64  
 6   Smoking_Status                11874 non-null  object 
 7   Alcohol_Intake                11874 non-null  object 
 8   Physical_Activity             11874 non-null  object 
 9   Obesity_Index                 11874 non-null  float64
 10  Diabetes_Status               11874 non-null  object 
 11  Family_History_Heart_Disease  11874 non-null  object 
 12  Diet_Quality                  11873 non-null  object 
 13  S

In [7]:
# Check for duplicate rows
# Counting the number of duplicate rows
num_duplicates = df.duplicated().sum()
print(f"Number of duplicate rows: {num_duplicates}")

Number of duplicate rows: 0


In [8]:
# Create the 3D scatter plot
fig = go.Figure(data=go.Scatter3d(
    x=df['Age'],
    y=df['Cholesterol_Level'],
    z=df['Blood_Pressure_Systolic'],
    mode='markers',
    marker=dict(
        size=5,
        color=df['Heart_Attack_Outcome'],  # binary outcome (e.g., 0 or 1)
        colorscale='Viridis',
        opacity=0.7
    ),
    text=[f"Gender: {g}, Smoking: {s}" for g, s in zip(df['Gender'], df['Smoking_Status'])]
))

fig.update_layout(
    title="3D Scatter Plot: Age vs. Cholesterol vs. Systolic BP",
    scene=dict(
        xaxis_title='Age',
        yaxis_title='Cholesterol Level',
        zaxis_title='Systolic Blood Pressure'
    )
)

fig.show()

# **3D Surface Plot of Predicted Heart Attack Risk**

In [9]:
#Required Libraries
import numpy as np
from sklearn.linear_model import LogisticRegression
import plotly.graph_objs as go

In [10]:
# Prepare features and target variable
# Remove rows with NaN in 'Heart_Attack_Outcome'
df_cleaned = df.dropna(subset=['Heart_Attack_Outcome'])
X = df_cleaned[['Age', 'Cholesterol_Level']]
y = df_cleaned['Heart_Attack_Outcome']

# Train the logistic regression model
model = LogisticRegression().fit(X, y)

# Create a meshgrid for Age and Cholesterol_Level
age_range = np.linspace(df_cleaned['Age'].min(), df_cleaned['Age'].max(), 50)
chol_range = np.linspace(df_cleaned['Cholesterol_Level'].min(), df_cleaned['Cholesterol_Level'].max(), 50)
age_grid, chol_grid = np.meshgrid(age_range, chol_range)

# Predict risk probability over the grid
grid_points = np.c_[age_grid.ravel(), chol_grid.ravel()]
risk_prob = model.predict_proba(grid_points)[:, 1].reshape(age_grid.shape)

# Create the 3D surface plot
fig_surface = go.Figure(data=go.Surface(
    x=age_grid,
    y=chol_grid,
    z=risk_prob,
    colorscale='Blues'
))

fig_surface.update_layout(
    title="3D Surface Plot: Predicted Heart Attack Risk",
    scene=dict(
        xaxis_title='Age',
        yaxis_title='Cholesterol Level',
        zaxis_title='Risk Probability'
    )
)

fig_surface.show()


X does not have valid feature names, but LogisticRegression was fitted with feature names



# **Clustering Analysis with 3D Visualization**

In [11]:
# Required Libraries
import pandas as pd
import plotly.graph_objs as go
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [12]:
# Select features for clustering
features = ['Age', 'Cholesterol_Level', 'Blood_Pressure_Systolic', 'Obesity_Index']
X = df[features].dropna()  # drop any missing values if necessary

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Perform k-means clustering (experiment with n_clusters, e.g., 3)
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(X_scaled)

# Create a new DataFrame with cluster labels
df_cluster = X.copy()
df_cluster['Cluster'] = clusters
df_cluster['Patient_ID'] = df['Patient_ID']

# Create the 3D scatter plot with clusters
fig_cluster = go.Figure(data=go.Scatter3d(
    x=df_cluster['Age'],
    y=df_cluster['Cholesterol_Level'],
    z=df_cluster['Blood_Pressure_Systolic'],
    mode='markers',
    marker=dict(
        size=5,
        color=df_cluster['Cluster'],
        colorscale='cividis',
        opacity=0.7
    ),
    text=[f"Patient ID: {pid}<br>Obesity Index: {oi}" for pid, oi in zip(df_cluster['Patient_ID'], df_cluster['Obesity_Index'])]
))

fig_cluster.update_layout(
    title="3D Clustering of Heart Attack Risk Factors",
    scene=dict(
        xaxis_title='Age',
        yaxis_title='Cholesterol Level',
        zaxis_title='Systolic Blood Pressure'
    )
)

fig_cluster.show()

# **Data Filtering and Export Dashboard (Bar Chart)**

In [15]:
import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output, State

app = dash.Dash(__name__)

app.layout = html.Div([
    html.Div([
        html.Label("Age Range:"),
        dcc.RangeSlider(
            id='filter-age-slider',
            min=int(df['Age'].min()),
            max=int(df['Age'].max()),
            value=[int(df['Age'].min()), int(df['Age'].max())],
            marks={str(age): str(age) for age in range(int(df['Age'].min()), int(df['Age'].max())+1, 10)}
        ),
        html.Label("Select Gender:"),
        dcc.Dropdown(
            id='filter-gender-dropdown',
            options=[{'label': gen, 'value': gen} for gen in df['Gender'].unique()],
            value=df['Gender'].unique()[0],
            clearable=False
        ),
        html.Label("Smoking Status:"),
        dcc.RadioItems(
            id='filter-smoking-radio',
            options=[{'label': status, 'value': status} for status in df['Smoking_Status'].unique()],
            value=df['Smoking_Status'].unique()[0]
        ),
        html.Button("Download Filtered Data", id='download-button'),
        dcc.Download(id='download-dataframe-csv')
    ], style={'width': '30%', 'display': 'inline-block', 'verticalAlign': 'top'}),

    html.Div([
        dcc.Graph(id='filter-bar')
    ], style={'width': '68%', 'display': 'inline-block', 'padding': '0 20px'})
])

@app.callback(
    Output('filter-bar', 'figure'),
    [Input('filter-age-slider', 'value'),
     Input('filter-gender-dropdown', 'value'),
     Input('filter-smoking-radio', 'value')]
)
def update_filter_bar(selected_age, selected_gender, selected_smoking):
    filtered_df = df[
        (df['Age'] >= selected_age[0]) &
        (df['Age'] <= selected_age[1]) &
        (df['Gender'] == selected_gender) &
        (df['Smoking_Status'] == selected_smoking)
    ]

    # Group by Heart_Attack_Outcome to count occurrences
    outcome_counts = filtered_df['Heart_Attack_Outcome'].value_counts().sort_index()

    # Create a bar chart of the outcome counts
    fig = go.Figure(data=go.Bar(
        x=outcome_counts.index.astype(str),  # converting to string for clear labels ("0" or "1")
        y=outcome_counts.values,
        marker_color='lightsalmon'
    ))

    fig.update_layout(
        title="Filtered Heart Attack Outcome Distribution",
        xaxis_title="Heart Attack Outcome (0 = No, 1 = Yes)",
        yaxis_title="Number of Patients"
    )
    return fig

@app.callback(
    Output('download-dataframe-csv', 'data'),
    [Input('download-button', 'n_clicks')],
    [State('filter-age-slider', 'value'),
     State('filter-gender-dropdown', 'value'),
     State('filter-smoking-radio', 'value')]
)
def download_filtered_data(n_clicks, selected_age, selected_gender, selected_smoking):
    if n_clicks is None:
        return dash.no_update
    filtered_df = df[
        (df['Age'] >= selected_age[0]) &
        (df['Age'] <= selected_age[1]) &
        (df['Gender'] == selected_gender) &
        (df['Smoking_Status'] == selected_smoking)
    ]
    return dcc.send_data_frame(filtered_df.to_csv, "filtered_heart_attack_data.csv", index=False)

if __name__ == '__main__':
    app.run(debug=True)

<IPython.core.display.Javascript object>