<a href="https://colab.research.google.com/github/TarekHasan011/Pattern-Recognition/blob/main/Pattern_Laboratory_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Reading Data

In [None]:
import pandas as pd
from pandas.api.types import is_string_dtype
data = pd.read_csv('healthcare-dataset-stroke-data.csv', na_values='N/A')

for column in data.columns:
    if (is_string_dtype(data[column].dtype)):
        data[column] = data[column].str.strip()

# Splitting Data

In [None]:
from sklearn.model_selection import train_test_split
X = data.loc[:, data.columns != 'stroke']
X = X.loc[:, X.columns != 'id']
y = data['stroke'] ^ 1
train_X, test_X, train_y, test_y = train_test_split(X,y,test_size=0.2,stratify=y)

# Handling Missing Values

In [None]:
from sklearn.impute import SimpleImputer
from pandas.api.types import is_string_dtype

si_train_X = pd.DataFrame()
si_test_X = pd.DataFrame()

for column in train_X.columns:
  if (is_string_dtype(train_X[column].dtype)):
    si = SimpleImputer(strategy='most_frequent')
  else:
    si = SimpleImputer(strategy='median')
  si.fit(train_X[[column]])
  si_train_X[column] = si.transform(train_X[[column]]).flatten()
  si_test_X[column] = si.transform(test_X[[column]]).flatten()

# Handling Text Features

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(train_y)
train_y = le.transform(train_y)
test_y = le.transform(test_y)

categorical_feature = ['gender','ever_married','work_type','Residence_type','smoking_status']
l_train_X = pd.DataFrame()
l_test_X = pd.DataFrame()

for column in train_X.columns:
  if column in categorical_feature:
    le.fit(si_train_X[column])
    l_train_X[column] = le.transform(si_train_X[column])
    l_test_X[column] = le.transform(si_test_X[column])
  else:
    l_train_X[column] = si_train_X[column].copy()
    l_test_X[column] = si_test_X[column].copy()

# Scaling

In [None]:
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
ss.fit(l_train_X)
ss_train_X = ss.transform(l_train_X)
ss_test_X = ss.transform(l_test_X)

# SVM 

In [None]:
from sklearn.svm import SVC
svc = SVC()
svc.fit(ss_train_X,train_y)
predictions = svc.predict(ss_test_X)
from sklearn.metrics import accuracy_score, f1_score,recall_score, precision_score
accuracy = accuracy_score(test_y,predictions)
f1 = f1_score(test_y,predictions)
print(f'Accuracy: {accuracy} F1 Score: {f1}')

Accuracy: 0.9510763209393346 F1 Score: 0.9749247743229689


# K Nearest Neighbor

In [None]:
from sklearn import neighbors
knn = neighbors.KNeighborsClassifier(n_neighbors=30, weights='uniform')
knn.fit(ss_train_X,train_y)
predictions = knn.predict(ss_test_X)
accuracy = accuracy_score(test_y,predictions)
f1 = f1_score(test_y,predictions)
print(f'Accuracy: {accuracy} F1 Score: {f1}')

Accuracy: 0.9510763209393346 F1 Score: 0.9749247743229689


# Decision Tree Classfier

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier(criterion='entropy')
dt.fit(ss_train_X,train_y)
predictions = dt.predict(ss_test_X)
accuracy = accuracy_score(test_y,predictions)
f1 = f1_score(test_y,predictions)
print(f'Accuracy: {accuracy} F1 Score: {f1}')

Accuracy: 0.9060665362035225 F1 Score: 0.9503619441571872


# Random Forest Classfier

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=1000, n_jobs=-1, verbose=0)
rf.fit(ss_train_X,train_y)
predictions = rf.predict(ss_test_X)
accuracy = accuracy_score(test_y,predictions)
f1 = f1_score(test_y,predictions)
print(f'Accuracy: {accuracy} F1 Score: {f1}')

Accuracy: 0.9510763209393346 F1 Score: 0.9748995983935742


# Gaussian Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(ss_train_X,train_y)
predictions = gnb.predict(ss_test_X)
accuracy = accuracy_score(test_y,predictions)
f1 = f1_score(test_y,predictions)
print(f'Accuracy: {accuracy} F1 Score: {f1}')

Accuracy: 0.87279843444227 F1 Score: 0.9303322615219721


# Multinomial Naive Bayes

In [None]:
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB()
clf.fit(l_train_X,train_y)
predictions = clf.predict(l_test_X)
accuracy = accuracy_score(test_y,predictions)
f1 = f1_score(test_y,predictions)
print(f'Accuracy: {accuracy} F1 Score: {f1}')

Accuracy: 0.812133072407045 F1 Score: 0.893687707641196


# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
lo_reg = LogisticRegression()
model = lo_reg.fit(ss_train_X,train_y)
predictions = model.predict(ss_test_X)
accuracy = accuracy_score(test_y,predictions)
f1 = f1_score(test_y,predictions)
print(f'Accuracy: {accuracy} F1 Score: {f1}')

Accuracy: 0.9510763209393346 F1 Score: 0.9749247743229689
