<a href="https://colab.research.google.com/github/DeepsMaxi305/Data_Science/blob/main/M507Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Importing Libraries

In [33]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score


#Load the Dataset

In [34]:
data = pd.read_csv('stroke_dataset.csv')

#Drop missing values

In [35]:
data.dropna(inplace=True)

#One Hot Encoding

In [36]:
data_encoded = pd.get_dummies(data, columns=['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status'], drop_first=True)

#Standardization

In [37]:
scaler = MinMaxScaler()
data_encoded[['age', 'avg_glucose_level', 'bmi']] = scaler.fit_transform(data_encoded[['age', 'avg_glucose_level', 'bmi']])

#Splitting Dataset into Train and Test sets

In [38]:
X = data_encoded.drop('stroke', axis=1)
y = data_encoded['stroke']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Imputing Missing Values

In [39]:
imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

#Gradient Boosting Classifier

In [40]:
model = HistGradientBoostingClassifier()

#Training the Model and Predicting on the Test set

In [41]:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


#Calculating Evaluation Metrics

In [42]:
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.99      0.97       929
           1       0.29      0.04      0.07        53

    accuracy                           0.94       982
   macro avg       0.62      0.52      0.52       982
weighted avg       0.91      0.94      0.92       982

Confusion Matrix:
[[924   5]
 [ 51   2]]


#Calculating the Accuracy

In [43]:
y_pred_prob = model.predict_proba(X_test)[:, 1]
auc_roc = roc_auc_score(y_test, y_pred_prob)
print("AUC-ROC:", auc_roc)

AUC-ROC: 0.822877104616447
