<a href="https://colab.research.google.com/github/Rohith-Potana/CodSoft-Internship/blob/main/CodSoft1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# Import necessary libraries
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Download and load the dataset with appropriate column names
!wget -O heart.csv https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data

column_names = [
    'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg',
    'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'
]

df = pd.read_csv('heart.csv', header=None, names=column_names, na_values='?')

# Handling missing values
df = df.dropna()

--2024-07-02 16:45:59--  https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘heart.csv’

heart.csv               [ <=>                ]  18.03K  --.-KB/s    in 0.06s   

2024-07-02 16:46:00 (295 KB/s) - ‘heart.csv’ saved [18461]



In [11]:
# Count plot of target variable
fig = px.histogram(df, x='target', title='Distribution of Heart Disease', labels={'target':'Heart Disease'})
fig.show()

In [12]:
# Pair plot of features colored by target variable
fig = px.scatter_matrix(df, dimensions=['age', 'trestbps', 'chol', 'thalach', 'oldpeak'], color='target', title='Pair Plot')
fig.show()

In [13]:
# Correlation heatmap
corr = df.corr()
fig = go.Figure(data=go.Heatmap(
                   z=corr.values,
                   x=corr.columns,
                   y=corr.columns,
                   colorscale='Viridis'))
fig.update_layout(title='Correlation Heatmap')
fig.show()

In [14]:
# Scatter plot of age vs. maximum heart rate achieved
fig = px.scatter(df, x='age', y='thalach', color='target', title='Age vs. Max Heart Rate Achieved', labels={'thalach':'Max Heart Rate'})
fig.show()

In [15]:
# Box plot of age by target
fig = px.box(df, x='target', y='age', title='Age Distribution by Heart Disease Status', labels={'target':'Heart Disease', 'age':'Age'})
fig.show()

In [16]:
# Data preprocessing
X = df.drop('target', axis=1)
y = df['target']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [17]:
# Model training
model = LogisticRegression()
model.fit(X_train, y_train)

In [18]:
# Model evaluation
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred, output_dict=True)

print(f'Accuracy: {accuracy:.2f}')
print('Confusion Matrix:')
print(conf_matrix)
print('Classification Report:')
print(class_report)

Accuracy: 0.62
Confusion Matrix:
[[34  1  0  1  0]
 [ 5  1  0  2  1]
 [ 2  1  1  1  0]
 [ 1  2  1  1  2]
 [ 1  1  0  1  0]]
Classification Report:
{'0': {'precision': 0.7906976744186046, 'recall': 0.9444444444444444, 'f1-score': 0.8607594936708861, 'support': 36}, '1': {'precision': 0.16666666666666666, 'recall': 0.1111111111111111, 'f1-score': 0.13333333333333333, 'support': 9}, '2': {'precision': 0.5, 'recall': 0.2, 'f1-score': 0.28571428571428575, 'support': 5}, '3': {'precision': 0.16666666666666666, 'recall': 0.14285714285714285, 'f1-score': 0.15384615384615383, 'support': 7}, '4': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 3}, 'accuracy': 0.6166666666666667, 'macro avg': {'precision': 0.3248062015503876, 'recall': 0.27968253968253964, 'f1-score': 0.2867306533129318, 'support': 60}, 'weighted avg': {'precision': 0.5605297157622737, 'recall': 0.6166666666666667, 'f1-score': 0.5782139379607736, 'support': 60}}


In [19]:
# Confusion matrix heatmap
fig = go.Figure(data=go.Heatmap(
                   z=conf_matrix,
                   x=['Predicted Negative', 'Predicted Positive'],
                   y=['Actual Negative', 'Actual Positive'],
                   colorscale='Viridis'))
fig.update_layout(title='Confusion Matrix')
fig.show()

In [20]:
# Classification report as a table
report_df = pd.DataFrame(class_report).transpose()
fig = go.Figure(data=[go.Table(
    header=dict(values=list(report_df.columns),
                fill_color='paleturquoise',
                align='left'),
    cells=dict(values=[report_df[col] for col in report_df.columns],
               fill_color='lavender',
               align='left'))
])
fig.update_layout(title='Classification Report')
fig.show()